diff --git a/.project b/.project
index 194410caf174073a69ebd9c12bcdf3acd7246b69..fb9d1f0078f3d9951617214196c3453b601a55eb 100644
--- a/.project
+++ b/.project
@@ -20,4 +20,15 @@
 		<nature>org.eclipse.jdt.core.javanature</nature>
 		<nature>org.python.pydev.pythonNature</nature>
 	</natures>
+	<filteredResources>
+		<filter>
+			<id>1655190855508</id>
+			<name></name>
+			<type>30</type>
+			<matcher>
+				<id>org.eclipse.core.resources.regexFilterMatcher</id>
+				<arguments>node_modules|.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__</arguments>
+			</matcher>
+		</filter>
+	</filteredResources>
 </projectDescription>
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea17282925783565b68de6315451b7001a63487b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+pandas
+sklearn
+cython
diff --git a/src/main_TIPM.py b/src/main_TIPM.py
index c5897d82604669d8e370af32f644359a232e6289..fe3a3b35f91bf49ec452b0c3a1fbfc51c9ebaebc 100644
--- a/src/main_TIPM.py
+++ b/src/main_TIPM.py
@@ -4,35 +4,37 @@ pattern-based anomaly detection
 authors: Len Feremans
 created: 8 May 2019
 
-Used for integration with TIPM: A tool voor Interactive time series 
+Used for integration with TIPM: A tool voor Interactive time series
 pattern mining and anomaly detection. (https://bitbucket.org/len_feremans/tipm_pub).
 For TIPM we run PBAD_Embed commansd-line, which is PBAD without preprocessing and pattern mining first,
 since this is done by this tools. PBAD_Embed computes weighted occurences and an isolation forest
 """
-import sys, os
+import sys
+import os
 import pandas as pd
 import numpy as np
 from methods.PBAD_Embed import PBAD_Embed
 from sklearn.metrics import roc_auc_score, average_precision_score
 from collections import defaultdict
 
-#Convert nested list of windows to 2d numpy array
-#Problem: if windows have different dimensions, np.array does not create matrix,
-#but list of objects.
-#Create matrix and pad windows with 0's if necessary
+
+# Convert nested list of windows to 2d numpy array
+# Problem: if windows have different dimensions, np.array does not create matrix,
+# but list of objects.
+# Create matrix and pad windows with 0's if necessary
 def windows2numpy(listOfWindows):
     normal_length = len(listOfWindows[len(listOfWindows)//2])
-    listOfWindows2 = [];
+    listOfWindows2 = []
     for i in range(0, len(listOfWindows)):
         lst1 = listOfWindows[i]
-        lenLst1 = len(lst1) 
+        lenLst1 = len(lst1)
         if lenLst1 != normal_length:
             if lenLst1 > normal_length:
                 raise Exception("Length is higher than expected")
             else:
                 for i in range(0, normal_length - lenLst1):
                     lst1.append(0.0)
-        for idx, val in enumerate(lst1): #bug in PBAD, called from TIPM, if empty values
+        for idx, val in enumerate(lst1):  # bug in PBAD, called from TIPM, if empty values
             if val == '?':
                 lst1[idx] = 0.0
         np_arr = np.array(lst1).astype(np.float64)
@@ -40,14 +42,15 @@ def windows2numpy(listOfWindows):
     np_arr = np.array(listOfWindows2)
     print('Debug: windows2numpy: type {}, type(arr[0]) {}, type(arr[0][0]) {} shape {}, arr[0] {}'.format(
         type(np_arr),
-        type(np_arr[0]), 
-        type(np_arr[0][0]), 
+        type(np_arr[0]),
+        type(np_arr[0][0]),
         np_arr.shape,
         np_arr[i][0]))
     return np_arr
 
+
 if __name__ == '__main__':
-    #parse arguments
+    # parse arguments
     usage = "main_TIPM -input CSVFILE  -type all -columns pc1,pc2\n" + \
             "-itemset_fnames pc1_closed_item.txt,pc2_closed_item.txt\n" + \
             "-sequential_fnames pc1_closed_sp.txt,pc2_closed_sp.txt\n" + \
@@ -56,12 +59,13 @@ if __name__ == '__main__':
     print('Argument List:' + str(arguments))
     if '-?' in arguments:
         print(usage)
-        sys.exit(0) #normal end, for -? parameter
+        sys.exit(0)  # normal end, for -? parameter
+
     if not('-type' in arguments and '-columns' in arguments and '-input' in arguments
-        and ('-itemset_fnames' in arguments or '-sequential_fnames' in arguments)):
+            and ('-itemset_fnames' in arguments or '-sequential_fnames' in arguments)):
         print(usage)
         sys.exit(-1)
-        
+
     def get_argument(key):
         for idx, arg in enumerate(arguments):
             if arg.strip().lower() == key:
@@ -70,90 +74,93 @@ if __name__ == '__main__':
                 else:
                     raise Exception("Illegal last argument. " + str(arguments))
         return None
-    inputfilename = get_argument('-input')
+
+    input_filename = get_argument('-input')
     pattern_type = get_argument('-type')
     columns = get_argument('-columns').lower().split(',')
     itemset_fnames = get_argument('-itemset_fnames')
     sequential_fnames = get_argument('-sequential_fnames')
     score_fname = get_argument('-score_fname')
-    #Validation command-line arguments
+    # Validation command-line arguments
     # 1) Type is either all, itemset, sequential
-    # 2) Depending on type we expect either an file with either itemsets and/or sequential pattern for each column 
-    if not pattern_type in ['all', 'itemset', 'sequential']:
-        print('Type not in ' + str(['all', 'itemset', 'sequential'])); 
+    # 2) Depending on type we expect either an file with either itemsets and/or sequential pattern for each column
+    if pattern_type not in ['all', 'itemset', 'sequential']:
+        print('Type not in ' + str(['all', 'itemset', 'sequential']))
         print(usage)
         sys.exit(-1)
-    if not os.path.isfile(inputfilename):
-        print('input does not exist') 
+    if not os.path.isfile(input_filename):
+        print('input does not exist')
         print(usage)
         sys.exit(-1)
-    if (pattern_type == 'all' or pattern_type=='itemset') and itemset_fnames == None:
-        print('Specify -itemset_fnames') 
+    if (pattern_type == 'all' or pattern_type == 'itemset') and itemset_fnames is None:
+        print('Specify -itemset_fnames')
         print(usage)
         sys.exit(-1)
-    if (pattern_type == 'all' or pattern_type=='sequential') and sequential_fnames == None:
-        print('Specify -sequential_fnames') 
+    if (pattern_type == 'all' or pattern_type == 'sequential') and sequential_fnames is None:
+        print('Specify -sequential_fnames')
         print(usage)
         sys.exit(-1)
     for fnames in [itemset_fnames, sequential_fnames]:
-        if fnames != None:
+        if fnames is not None:
             for idx, fname in enumerate(fnames.split(',')):
                 if not os.path.isfile(fname):
-                    print('pattern input does not exist ' + fname) 
-                    print(usage)  
-                    sys.exit(-1) 
+                    print('pattern input does not exist ' + fname)
+                    print(usage)
+                    sys.exit(-1)
                 else:
                     f = open(fname, 'r')
                     l1 = f.readline().lower().split(',')
                     l2 = f.readline().lower().split(',')
                     print(str(idx) + ': Reading patterns ' + fname + ' for testing\n' + str(l1) + '\n' + str(l2))
-                    #print('   Associate column: ' + columns[idx])
-                    f.close()      
-                    
-    #Validation CSV file
+                    # print('   Associate column: ' + columns[idx])
+                    f.close()
+
+    # Validation CSV file
     # Assumes CSV file has following structure:
     # 1) First column is timestamp/time step
     # 2) Label column is named "label"
     # 3) Window column is named "window"
     # 4) For each continous time series with name X, the corresponding columns has name X_D
     # 5) Patternset are 1 dimensional
-    f = open(inputfilename, 'r')
+    f = open(input_filename, 'r')
     columns_csv = f.readline().lower().strip().split(',')
-    f.close()     
+    f.close()
     print('Reading CSVFile ' + str(columns_csv))
-    if not 'window' in columns_csv:   
+    if 'window' not in columns_csv:
         print('Expecting column window')
-        sys.exit(-1) 
-    if not 'label' in columns_csv:   
+        sys.exit(-1)
+
+    if 'label' not in columns_csv:
         print('Expecting column label')
-        sys.exit(-1)   
-    #If discrete column names are pased, fix this
-    columns = [col if not col.endswith('_d') else col[0:len(col)-2] for col in columns]  
+        sys.exit(-1)
+
+    # If discrete column names are pased, fix this
+    columns = [col if not col.endswith('_d') else col[0:len(col)-2] for col in columns]
     for col in columns:
-        if not col in columns_csv:
+        if col not in columns_csv:
             print('Expecting time series column ' + col)
-            sys.exit(-1)   
-        if not col + '_d' in columns_csv:
+            sys.exit(-1)
+        if col + '_d' not in columns_csv:
             print('Excepting time series discretized column with name ' + col + '_d')
-            sys.exit(-1)     
-   
-    #RUN
-    #preprocess: create windows for each continuous column, i.e. group by window column in TIPM
+            sys.exit(-1)
+
+    # RUN
+    # preprocess: create windows for each continuous column, i.e. group by window column in TIPM
     #            for labels create either 1 (anomaly) if 1 is in window, or -1 (good) if -1 in window and not 1, else 0
-    #Note: Doing this in plain-old python, instead of using more efficient numpy stuff
-    df = pd.read_csv(inputfilename, header=0, index_col=0)
+    # Note: Doing this in plain-old python, instead of using more efficient numpy stuff
+    df = pd.read_csv(input_filename, header=0, index_col=0)
     cols = [c.lower().strip() for c in list(df.columns.values)]
-    rows =  df.values.tolist()
+    rows = df.values.tolist()
     windowIdx = cols.index("window")
     labelIdx = cols.index("label")
     columnsIdx = [cols.index(col) for col in columns]
-    discrete_columnsIdx = [cols.index(col+'_d') for col in columns]                         
+    discrete_columnsIdx = [cols.index(col+'_d') for col in columns]
     group_by_window = defaultdict(list)
     current_window = None
     windows = list()
     for row in rows:
         window = row[windowIdx]
-        if not window in windows:
+        if window not in windows:
             windows.append(window)
         group_by_window[window].append(row)
     windowed_labels = []
@@ -180,36 +187,40 @@ if __name__ == '__main__':
             windowed_series[i].append(series[i])
         for i in range(0, len(discrete_columnsIdx)):
             windowed_series_discrete[i].append(discrete_series[i])
-    #transform to datastructures for PBAD
-    window_labels=np.array(windowed_labels)
+    # transform to datastructures for PBAD
+    window_labels = np.array(windowed_labels)
     continuous_data = {}
-    continuous_data_discretized={}
+    continuous_data_discretized = {}
     for i in range(0, len(columnsIdx)):
         continuous_data[i] = windows2numpy(windowed_series[i])
         continuous_data_discretized[i] = windows2numpy(windowed_series_discrete[i])
-    #cont_series = {0: data.iloc[:, 0].values}
-    #labels = data.iloc[:, 1].values
-    #cd_D, cd_UD, _, window_labels = preprocess(cont_series, labels=labels)       
+    # cont_series = {0: data.iloc[:, 0].values}
+    # labels = data.iloc[:, 1].values
+    # cd_D, cd_UD, _, window_labels = preprocess(cont_series, labels=labels)
     # run PBAD, sequential_fnames]:
-    print('\nRunning PBAD Embed: This computes embedding of patterns, that is a weighted occurrences score for each pattern and each window,' + \
+    print('\nRunning PBAD Embed: This computes embedding of patterns, that is a weighted occurrences score for each pattern and each window,' +
           'and than compute an anomaly score using isolation forests. Patternsets must be provided.')
-    if itemset_fnames != None:
+
+    if itemset_fnames is not None:
         itemset_fnames = itemset_fnames.split(',')
-    if sequential_fnames != None:
+
+    if sequential_fnames is not None:
         sequential_fnames = sequential_fnames.split(',')
+
     detector = PBAD_Embed(pattern_type=pattern_type, itemset_filenames_cont=itemset_fnames, sp_filenames_cont=sequential_fnames)
     scores = detector.fit_predict(continuous_data_discretized, continuous_data)
     ixl = np.where(window_labels != 0)[0]
     auc = roc_auc_score(y_true=window_labels[ixl], y_score=scores[ixl])
     ap = average_precision_score(y_true=window_labels[ixl], y_score=scores[ixl])
     print("AUC: {:.3f}".format(auc))
-    print("AP: {:.3f}".format(ap))   
-    #save score
-    if score_fname != None:
+    print("AP: {:.3f}".format(ap))
+
+    # save score
+    if score_fname is not None:
         f = open(score_fname, 'w')
-        f.write("Window,Score\n")    
+        f.write("Window,Score\n")
         for idx, win in enumerate(windows):
             score = scores[idx]
-            f.write("{},{:.6f}\n".format(win,score)) 
-        f.close()         
-        print("Saved {}".format(score_fname))
\ No newline at end of file
+            f.write("{},{:.6f}\n".format(win, score))
+        f.close()
+        print("Saved {}".format(score_fname))
diff --git a/src/tox.ini b/src/tox.ini
new file mode 100644
index 0000000000000000000000000000000000000000..cd32d45e9126f41a031ccca92f8078feaebbfea7
--- /dev/null
+++ b/src/tox.ini
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length=150
+exclude = .git main.py
+docstring-convention = numpy
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000000000000000000000000000000000000..f8b1a82a6084039bda21dae51c9d8f39bb8e9ccd
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length=150
+exclude = .git
+docstring-convention = numpy