Skip to content
Snippets Groups Projects
Unverified Commit 2fb4a208 authored by BARBIER Marc's avatar BARBIER Marc
Browse files

blank label support for fpof and better error message for pbad

parent 89e82ff5
No related branches found
No related tags found
No related merge requests found
......@@ -113,6 +113,12 @@ public class RunPBADEmbeddingOnly {
status = CommandLineUtils.runCommandInUserDir(command, log, 10);
if(status != 0) {
String logStr = IOUtils.readFileFlat(log);
if(logStr.contains("Expecting column label")) {
throw new RuntimeException("The data is not labelled you need a column named 'label'");
}
System.err.println("Error running PBAD. Log:");
System.err.println(logStr);
System.err.println(usagePBADInstall);
......
......@@ -196,15 +196,19 @@ public class PatternMiningController {
}
private Map<Integer, Integer> getWindowLabels(File itemFile) throws IOException {
// timestamp,value,label,Window
Table window2label = MakePatternOccurrences.groupByWindow(itemFile,0,Integer.MAX_VALUE);
Map<Integer,Integer> window2LabelMap = new HashMap<>();
// timestamp,value,label,Window
List<String> columns = window2label.getRows().get(0);
int windowIdx = columns.indexOf("Window");
int labelIdx = columns.indexOf("label");
for(List<String> row: window2label.getRowsStartingFrom1()) {
if (labelIdx == -1) {
window2LabelMap.put(Integer.valueOf(row.get(windowIdx)), -1);
} else {
List<String> labels = Arrays.asList(row.get(labelIdx).split(";"));
int label = -1;
//TODO: dangerous aproximation
if(labels.contains("1.0")) {
label = 1;
}
......@@ -216,6 +220,7 @@ public class PatternMiningController {
}
window2LabelMap.put(Integer.valueOf(row.get(windowIdx)), label); //window->label
}
}
return window2LabelMap;
}
......@@ -537,7 +542,7 @@ public class PatternMiningController {
File patternOccFile = new File(Settings.FILE_FOLDER + patternSet.get().getFilenameOccurrences());
Table occ = CSVUtils.loadCSV(patternOccFile);
for(int i=1; i<occ.getRows().size(); i++) {
int windowId = Integer.valueOf(occ.getRows().get(i).get(0));
int windowId = Integer.parseInt(occ.getRows().get(i).get(0));
//int patternId = Integer.valueOf(occ.getRows().get(i).get(1));
countsPerWindow.add(windowId);
if(i == 1) {
......@@ -631,8 +636,7 @@ public class PatternMiningController {
Table scores = CSVUtils.loadCSV(scoreFile);
scores.getRows().get(0).add("Label");
for(List<String> row: scores.getRowsStartingFrom1()) {
Integer window = Integer.valueOf(row.get(0));
Double score = Double.valueOf(row.get(1));
int window = Integer.parseInt(row.get(0));
Integer label = window2LabelMap.get(window);
row.add(String.valueOf(label));
}
......@@ -673,7 +677,7 @@ public class PatternMiningController {
//AP: 0.259
//<Finished took 1.04 seconds
Map<String,Double> evalMap = new HashMap<String,Double>();
Map<String,Double> evalMap = new HashMap<>();
try {
Double auc = Double.valueOf(logStr.get(logStr.size()-3).split(":")[1]);
Double ap = Double.valueOf(logStr.get(logStr.size()-2).split(":")[1]);
......
#!/bin/python3
"""
Compute evaluation scores in python
authors: Len Feremans
created: July 2019
Used for integration with TIPM: A tool voor Interactive time series
Used for integration with TIPM: A tool for Interactive time series
pattern mining and anomaly detection. (https://bitbucket.org/len_feremans/tipm_pub).
Compute evaluation scores in Python, since Java libraries for computing those are harder to find/use?
"""
import sys,os
import sys
import os
import pandas as pd
import numpy as np
import sklearn
from sklearn.metrics import roc_auc_score, average_precision_score
def has_multiple_values(data_block) -> bool:
prev_val = data_block[0]
for val in data_block:
if prev_val != val:
return True
return False
if __name__ == '__main__':
# get file argument
arguments = sys.argv
......@@ -27,7 +38,7 @@ if __name__ == '__main__':
df = pd.read_csv(score_and_labels_file)
if list(df.columns.values) != ["Window", "Score", "Label"]:
print("Error: expecting [Window,Score,Label] as columns")
print(cols)
print(df.columns.values)
sys.exit(-1)
# compute evaluation score
windows = df['Window'].values
......@@ -37,7 +48,10 @@ if __name__ == '__main__':
scores = 1.0 - scores
window_labels = df['Label'].values
ixl = np.where(window_labels != 0)[0]
auc = roc_auc_score(y_true=window_labels[ixl], y_score=scores[ixl])
# the roc score is a methode of evaluating reliability but it doesn't work if we didn't properly fill in the labels
auc = roc_auc_score(y_true=window_labels[ixl], y_score=scores[ixl]) if has_multiple_values(window_labels) else 0
ap = average_precision_score(y_true=window_labels[ixl], y_score=scores[ixl])
print("AUC: {:.3f}".format(auc))
print("AP: {:.3f}".format(ap))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment