Skip to content
Snippets Groups Projects
Unverified Commit 07c8b8ad authored by BARBIER Marc's avatar BARBIER Marc
Browse files

Bugfixes and xstream update

I updated xstream for better error handleling since some modification i did caused some code to not load and the errors weren't clear.

I fixed a bug i introduced in previous commits in wich the saving of patterns wasn't properly working if you were using the routes in a session less way.
parent 8dbb8003
Branches
No related tags found
No related merge requests found
......@@ -92,16 +92,10 @@
<version>3.6.11</version>
</dependency>
<dependency>
<groupId>xstream</groupId>
<groupId>com.thoughtworks.xstream</groupId>
<artifactId>xstream</artifactId>
<version>1.2.2</version>
<version>1.4.19</version>
</dependency>
<!--
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jetty</artifactId>
</dependency>
-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
......
......@@ -8,23 +8,20 @@ import be.uantwerpen.ldataminining.utils.CollectionUtils;
import be.uantwerpen.ldataminining.utils.CommandLineUtils;
import be.uantwerpen.ldataminining.utils.IOUtils;
import be.uantwerpen.mime_webapp.Settings;
import ca.pfv.spmf.algorithmmanager.AlgorithmManager;
import ca.pfv.spmf.algorithmmanager.DescriptionOfAlgorithm;
public class MineUsingSPMF {
public final class MineUsingSPMF {
private MineUsingSPMF(){}
public static File runItemsetMining(File arff, List<String> columns, String algorithm, double support) throws Exception {
//check if algorithm exists
if(!checkAlgorithmExists(algorithm, true))
return null;
Pair<File, File> dictFile_transFile_pair = ArffToSPMF.arffToSPMFTransactionalDatabase(arff, columns);
//java -jar spmf.jar run Apriori contextPasquier99.txt output.txt 40%
File log = new File("./temp/pattern_mining_" + arff.getName() + ".log");
File outputRaw = new File("./temp/" + algorithm + "_out_raw_" + arff.getName() + ".txt");
CommandLineUtils.runCommandInUserDir(new String[]{"java", "-Xmx" + Settings.SMPF_MAX_RAM_ALLOCATION, "-jar", new File(Settings.SPMF_JAR).getAbsolutePath(), "run", algorithm,
String[] command = new String[]{"java", "-Xmx" + Settings.SMPF_MAX_RAM_ALLOCATION, "-jar", new File(Settings.SPMF_JAR).getAbsolutePath(), "run", algorithm,
dictFile_transFile_pair.getSecond().getAbsolutePath(),
outputRaw.getAbsolutePath(), "" + support + "%"}, log,
Settings.SMPF_TIMEOUT);
outputRaw.getAbsolutePath(), "" + support + "%"};
CommandLineUtils.runCommandInUserDir(command, log, Settings.SMPF_TIMEOUT);
IOUtils.printHead(log, 100);
String logStr = IOUtils.readFileFlat(log);
if(logStr.contains("Exception")) {
......@@ -47,9 +44,6 @@ public class MineUsingSPMF {
}
public static File runSequentialPatternMining(File arff, List<String> columns, String algorithm, double support) throws Exception {
//check if algorithm exists
if(!checkAlgorithmExists(algorithm, false))
return null;
Pair<File,File> dictFile_transFile_pair = ArffToSPMF.arffToSPMFSequenceDatabase(arff, columns);
//java -jar spmf.jar run Apriori contextPasquier99.txt output.txt 40%
File log = new File("./temp/pattern_mining_" + arff.getName() + ".log");
......@@ -79,29 +73,4 @@ public class MineUsingSPMF {
}
return outputReadable;
}
private static boolean checkAlgorithmExists(String algorithm, boolean itemsets) {
try {
DescriptionOfAlgorithm description = AlgorithmManager.getInstance().getDescriptionOfAlgorithm(algorithm);
if(description == null) {
throw new RuntimeException(String.format("Algorithm %s not found in SPMF", algorithm));
}
else {
System.out.println("Found:" + description.getName() + " - " + description.getAlgorithmCategory() + " (" + description.getImplementationAuthorNames() + ")");
for(int i = 0; i< description.getParametersDescription().length; i++) {
System.out.println(" Parameter:" + description.getParametersDescription()[i].name + " " + description.getParametersDescription()[i].example);
}
if(itemsets && !description.getAlgorithmCategory().equals("FREQUENT ITEMSET MINING")) {
throw new RuntimeException(String.format("Algorithm %s is not for FREQUENT ITEMSET mining, but %s", algorithm, description.getAlgorithmCategory()));
}
else if(!itemsets && !description.getAlgorithmCategory().equals("SEQUENTIAL PATTERN MINING")){
throw new RuntimeException(String.format("Algorithm %s is not for SEQUENTIAL PATTERN mining, but %s", algorithm, description.getAlgorithmCategory()));
}
return true;
}
}catch(Exception e) {
throw new RuntimeException(e);
}
}
}
......@@ -71,8 +71,9 @@ public class PatternMiningController extends AbstractController{
outputReadable = MineUsingSPMF.runSequentialPatternMining(data, Arrays.asList(columnsArr), algorithm, supportAsFloat);
}
savePatterns(request, id, data, outputReadable, "sequential patterns", columns, algorithm, support);
File occFile = storePatternSetOccurrences(id, outputReadable.getName(),request);
Project newProject = savePatterns(request, id, data, outputReadable, "sequential patterns", columns, algorithm, support);
final FileItem newInput = repository.getLatestItem(newProject, currentInput.getLogicalName());
File occFile = storePatternSetOccurrences(newInput, newProject, outputReadable.getName());
savePatternsWithMetadata(outputReadable, occFile, data);
return String.format("Found %d patterns.",IOUtils.countLines(outputReadable)-1);
}
......@@ -117,16 +118,15 @@ public class PatternMiningController extends AbstractController{
return runMining(columns, request, id, support, algorithm, isItemset);
}
private File storePatternSetOccurrences(Optional<String> id, String patternFilename, HttpServletRequest request) throws Exception
{
//get current input
MySession mySession = (MySession) request.getSession().getAttribute("mySession");
Project projectObj = repository.findByName(mySession.getCurrentProject());
FileItem currentInput = getCurrentItem(request, id);
private File storePatternSetOccurrences(FileItem currentInput, String outputReadableName) throws IOException {
Project project = repository.findProjectByItem(currentInput);
final FileItem newInput = repository.getLatestItem(project, currentInput.getLogicalName());
return storePatternSetOccurrences(newInput, project, outputReadableName);
}
if(!currentInput.isArff())
throw new RuntimeException("Only Arff supported");
private File storePatternSetOccurrences(FileItem currentInput, Project project, String patternFilename) throws IOException
{
//get pattern set
PatternSet set = null;
for(PatternSet patternSet: currentInput.getPatterns()) {
......@@ -147,12 +147,12 @@ public class PatternMiningController extends AbstractController{
table.addRow(Arrays.asList(String.valueOf(window),String.valueOf(patternId)));
}
}
File occurencesFile = new File(Settings.FILE_FOLDER + set.getFilename() + "_occurrences.csv");
CSVUtils.saveTable(table, occurencesFile);
File occurrencesFile = new File(Settings.FILE_FOLDER + set.getFilename() + "_occurrences.csv");
CSVUtils.saveTable(table, occurrencesFile);
//save projects.xml
set.setFilenameOccurrences(occurencesFile.getName());
repository.saveProject(projectObj);
return occurencesFile;
set.setFilenameOccurrences(occurrencesFile.getName());
repository.saveProject(project);
return occurrencesFile;
}
private void savePatternsWithMetadata(File patternsFile, File occurrencesFile, File itemFile) throws Exception {
......@@ -269,21 +269,26 @@ public class PatternMiningController extends AbstractController{
public @ResponseBody void filterPatternsOnLength(
@RequestParam("id") Optional<String> id,
@RequestParam("filename") String filename,
@RequestParam("minlen") String minlen,
@RequestParam("maxlen") String maxlen,
@RequestParam("minlen") String minLen,
@RequestParam("maxlen") String maxLen,
HttpServletRequest request) throws Exception
{
//1. get input data
FileItem currentItem = getCurrentItem(request, id);
Optional<PatternSet> patternSet = currentItem.getPatterns().stream().filter((x) -> x.getFilename().equals(filename)).findFirst();
if(!patternSet.isPresent())
throw new RuntimeException("patternSet not found");
int minlenInt = 1;
int maxlenInt = Integer.MAX_VALUE;
if(!minlen.isEmpty())
minlenInt = Integer.parseInt(minlen);
if(!maxlen.isEmpty())
maxlenInt = Integer.parseInt(maxlen);
int minLenInt = 1;
int maxLenInt = Integer.MAX_VALUE;
if(!minLen.isEmpty())
minLenInt = Integer.parseInt(minLen);
if(!maxLen.isEmpty())
maxLenInt = Integer.parseInt(maxLen);
//2. transform
File patternFile = new File(Settings.FILE_FOLDER + patternSet.get().getFilename());
Table patternsTable = CSVUtils.loadCSV(patternFile);
......@@ -296,7 +301,7 @@ public class PatternMiningController extends AbstractController{
String pattern = patternsTable.getRows().get(i).get(0);
String support = patternsTable.getRows().get(i).get(1);
int lenPattern = pattern.split(" ").length;
if(lenPattern >= minlenInt && lenPattern <= maxlenInt) {
if(lenPattern >= minLenInt && lenPattern <= maxLenInt) {
newTable.addRow(Arrays.asList(pattern,support));
}
}
......@@ -304,9 +309,10 @@ public class PatternMiningController extends AbstractController{
CSVUtils.saveTable(newTable, patternFile);
patternSet.get().setNoPatterns((long)newTable.getRows().size() -1);
currentItem.getStackOperations().add(String.format("Filter patterns(set=%s,minlen=%s,maxlen=%s)",
patternSet.get().getLabel(),minlen,maxlen));
patternSet.get().getLabel(),minLen,maxLen));
repository.save();
File occFile = storePatternSetOccurrences(id, patternFile.getName(),request);
File occFile = storePatternSetOccurrences(currentItem, patternFile.getName());
savePatternsWithMetadata(patternFile, occFile, currentItem.getFile());
}
......@@ -337,7 +343,8 @@ public class PatternMiningController extends AbstractController{
currentItem.getStackOperations().add(String.format("Filter support(set=%s,topK=%s)",
patternSet.get().getLabel(),topk));
repository.save();
storePatternSetOccurrences(id, patternFile.getName(),request);
storePatternSetOccurrences(currentItem, patternFile.getName());
}
//todo: Now only WITHIN 1 patternset, not between patternsets!
......@@ -415,7 +422,8 @@ public class PatternMiningController extends AbstractController{
currentItem.getStackOperations().add(String.format("Remove redundant(set=%s, jaccard_threshold=%s)",
patternSet.get().getLabel(), threshold));
repository.save();
File occFile = storePatternSetOccurrences(id, patternFile.getName(),request);
File occFile = storePatternSetOccurrences(currentItem, patternFile.getName());
savePatternsWithMetadata(patternFile, occFile, currentItem.getFile());
}
......@@ -709,10 +717,10 @@ public class PatternMiningController extends AbstractController{
}
}
private void savePatterns(HttpServletRequest request, Optional<String> id, File arffFile, File patternFile, String type, String columns, String algorithm, String support){
private Project savePatterns(HttpServletRequest request, Optional<String> id, File arffFile, File patternFile, String type, String columns, String algorithm, String support){
try {
FileItem currentItem = getCurrentItem(request, id);
Project projectObj = repository.findProjectByItem(currentItem);
Project project = repository.findProjectByItem(currentItem);
//make pattern set object
PatternSet patternSet = new PatternSet();
patternSet.setColumns(columns);
......@@ -727,12 +735,13 @@ public class PatternMiningController extends AbstractController{
item.add(patternSet);
String transform = "Mine " + (type.equals("itemsets")?"itemsets":"sequential patterns") + "(" + columns + ", " + algorithm + ", " + support + "%)";
item.getStackOperations().add(transform);
projectObj.add(item);
repository.saveProject(projectObj);
project.add(item);
repository.saveProject(project);
//set current session
if(request.getSession() != null) {
request.getSession().setAttribute("mySession", new MySession(projectObj.getName(), item.getId()));
request.getSession().setAttribute("mySession", new MySession(project.getName(), item.getId()));
}
return project;
} catch (Exception e) {
throw new RuntimeException(e);
}
......
......@@ -12,6 +12,7 @@ import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.security.AnyTypePermission;
import be.uantwerpen.mime_webapp.Settings;
import be.uantwerpen.mime_webapp.model.FileItem;
......@@ -144,6 +145,7 @@ public class ProjectRepository {
xstream.alias("project", Project.class);
xstream.alias("item", FileItem.class);
xstream.alias("patterns", PatternSet.class);
xstream.addPermission(AnyTypePermission.ANY);
return xstream;
}
......@@ -158,4 +160,16 @@ public class ProjectRepository {
return null;
}
public FileItem getLatestItem(Project project, String logicalName) {
//TODO: check if the list is sorted if it is this search can be greatly simplified
FileItem latest = null;
for(FileItem item: project.getFileItems()) {
if(item.getLogicalName().equals(logicalName) &&
(latest == null || latest.getVersion() < item.getVersion())) {
latest = item;
}
}
return latest;
}
}
......@@ -14,16 +14,17 @@ public class FileItem implements Serializable{
private String logicalName;
private int version=0;
//id used for serialization and storage
private String id;
private List<String> stackOperations = new ArrayList<String>();
private List<String> stackOperations = new ArrayList<>();
private String filename;
private Long noColumns;
private Long noRows;
private List<PatternSet> patterns = new ArrayList<PatternSet>();
private List<AnomalyScores> scores = new ArrayList<AnomalyScores>();
private List<PatternSet> patterns = new ArrayList<>();
private List<AnomalyScores> scores = new ArrayList<>();
//back link to project, give 'infinite' loop error when saving
......@@ -36,9 +37,10 @@ public class FileItem implements Serializable{
item.setLogicalName(getLogicalName());
item.setFilename(getFilename());
item.setVersion(getVersion()+1);
item.setStackOperations(new ArrayList<String>(getStackOperations()));
item.setStackOperations(new ArrayList<>(getStackOperations()));
item.setPatterns(new ArrayList<>(getPatterns()));
item.setScores(new ArrayList<AnomalyScores>());
//TODO: check if this is correct (no copiing scores) if it is correct leave a comment
item.setScores(new ArrayList<>());
item.getId();
return item;
}
......@@ -129,9 +131,9 @@ public class FileItem implements Serializable{
return this.filename.toLowerCase().endsWith(".csv");
}
public String getId(){ //globally unique...
this.id = String.format("%s-%03d", logicalName, version);
return id; //e.g. iris-001
public String getId(){
id = String.format("%s-%03d", logicalName, version);
return id;
}
public File getFile(){
......
......@@ -14,11 +14,10 @@ public class MineUsingSPMFTest {
@Test
public void testRunItemsetMining() throws Exception {
File input = new File("./data/upload/ambient_temp-007.arff");
MineUsingSPMF service = new MineUsingSPMF();
String[] algorithms = new String[] {"Eclat", "Charm_bitset","Charm_MFI", "AprioriRare"};
List<String> cols = Arrays.asList("value");
for(String algo: algorithms) {
File outputPatterns = service.runItemsetMining(input, cols, algo, 5);
File outputPatterns = MineUsingSPMF.runItemsetMining(input, cols, algo, 5);
IOUtils.printHead(outputPatterns, 5);
IOUtils.printTail(outputPatterns, 5);
}
......@@ -27,11 +26,10 @@ public class MineUsingSPMFTest {
@Test
public void testRunSequentialPatternMining() throws Exception {
File input = new File("./data/upload/A10-028.arff");
MineUsingSPMF service = new MineUsingSPMF();
String[] algorithms = new String[] {"PrefixSpan", "CM-ClaSP","MaxSP"};
List<String> cols = Arrays.asList("unique_EVENT_E10");
for(String algo: algorithms) {
File outputPatterns = service.runSequentialPatternMining(input, cols, algo, 5);
File outputPatterns = MineUsingSPMF.runSequentialPatternMining(input, cols, algo, 5);
IOUtils.printHead(outputPatterns, 5);
IOUtils.printTail(outputPatterns, 5);
}
......
......@@ -37,8 +37,7 @@ public class TestMakePatternOccurrences {
@Test
public void testPatternOccurrencesItemsetRunSPMF() throws Exception {
File arff = new File("./data/upload/ambient_temp-010.arff");
MineUsingSPMF miner = new MineUsingSPMF();
File pattternFile = miner.runItemsetMining(arff, Arrays.asList("value","label"), "Apriori" , 10);
File pattternFile = MineUsingSPMF.runItemsetMining(arff, Arrays.asList("value","label"), "Apriori" , 10);
IOUtils.printHead(pattternFile);
PatternSet set = new PatternSet();
set.setColumns("value,label");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment