Jean-Francois Leveque

Ajout paramétrage seuil d'annotation

...@@ -51,6 +51,7 @@ public class PreprocessingRunner implements ApplicationRunner { ...@@ -51,6 +51,7 @@ public class PreprocessingRunner implements ApplicationRunner {
51 private Boolean ratings; 51 private Boolean ratings;
52 52
53 private float annotatePercent; 53 private float annotatePercent;
54 + private int annotateThreshold;
54 55
55 @Override 56 @Override
56 public void run(ApplicationArguments applicationArguments) throws Exception { 57 public void run(ApplicationArguments applicationArguments) throws Exception {
...@@ -58,7 +59,7 @@ public class PreprocessingRunner implements ApplicationRunner { ...@@ -58,7 +59,7 @@ public class PreprocessingRunner implements ApplicationRunner {
58 setFilenames(); 59 setFilenames();
59 List<AssociationElement> associationElements = loadAssociationElements(new File(dataDir, completeFilename)); 60 List<AssociationElement> associationElements = loadAssociationElements(new File(dataDir, completeFilename));
60 // associationElements = cleanupSmallCounts(associationElements, 1, 1); 61 // associationElements = cleanupSmallCounts(associationElements, 1, 1);
61 - List<AssociationElement> annotableElements = removeFirstSmallCounts(associationElements, 1, 1); 62 + List<AssociationElement> annotableElements = removeFirstSmallCounts(associationElements);
62 List<Integer> annotateIndexes = chooseAnnotated(associationElements, annotableElements, 1, 1); 63 List<Integer> annotateIndexes = chooseAnnotated(associationElements, annotableElements, 1, 1);
63 writeSampleAndAnnotated(new File(dataDir, sampleFilename), new File(dataDir, annontatedFilename), annotateIndexes, associationElements, annotableElements); 64 writeSampleAndAnnotated(new File(dataDir, sampleFilename), new File(dataDir, annontatedFilename), annotateIndexes, associationElements, annotableElements);
64 } 65 }
...@@ -116,7 +117,7 @@ public class PreprocessingRunner implements ApplicationRunner { ...@@ -116,7 +117,7 @@ public class PreprocessingRunner implements ApplicationRunner {
116 return associationElements; 117 return associationElements;
117 } 118 }
118 119
119 - private List<AssociationElement> removeFirstSmallCounts(List<AssociationElement> associationElements, int userSize, int itemSize) { 120 + private List<AssociationElement> removeFirstSmallCounts(List<AssociationElement> associationElements) {
120 121
121 boolean removedUser; 122 boolean removedUser;
122 boolean removedItem; 123 boolean removedItem;
...@@ -132,7 +133,7 @@ public class PreprocessingRunner implements ApplicationRunner { ...@@ -132,7 +133,7 @@ public class PreprocessingRunner implements ApplicationRunner {
132 itemIdSet = associationElements.stream().map(element -> element.getItemId()).collect(Collectors.toSet()); 133 itemIdSet = associationElements.stream().map(element -> element.getItemId()).collect(Collectors.toSet());
133 for (Long itemId : itemIdSet) { 134 for (Long itemId : itemIdSet) {
134 userCount = associationElements.stream().filter(element -> element.getItemId() == itemId).count(); 135 userCount = associationElements.stream().filter(element -> element.getItemId() == itemId).count();
135 - if (userCount <= userSize) { 136 + if (userCount <= annotateThreshold) {
136 associationElements = associationElements.stream().filter(element -> element.getItemId() != itemId).collect(Collectors.toList()); 137 associationElements = associationElements.stream().filter(element -> element.getItemId() != itemId).collect(Collectors.toList());
137 if (!removedItem) { 138 if (!removedItem) {
138 removedItem = true; 139 removedItem = true;
...@@ -149,7 +150,7 @@ public class PreprocessingRunner implements ApplicationRunner { ...@@ -149,7 +150,7 @@ public class PreprocessingRunner implements ApplicationRunner {
149 userIdSet = associationElements.stream().map(element -> element.getUserId()).collect(Collectors.toSet()); 150 userIdSet = associationElements.stream().map(element -> element.getUserId()).collect(Collectors.toSet());
150 for (Long userId : userIdSet) { 151 for (Long userId : userIdSet) {
151 itemCount = associationElements.stream().filter(element -> element.getUserId() == userId).count(); 152 itemCount = associationElements.stream().filter(element -> element.getUserId() == userId).count();
152 - if (itemCount <= itemSize) { 153 + if (itemCount <= annotateThreshold) {
153 associationElements = associationElements.stream().filter(element -> element.getUserId() != userId).collect(Collectors.toList()); 154 associationElements = associationElements.stream().filter(element -> element.getUserId() != userId).collect(Collectors.toList());
154 if (!removedUser) { 155 if (!removedUser) {
155 removedUser = true; 156 removedUser = true;
...@@ -314,6 +315,11 @@ public class PreprocessingRunner implements ApplicationRunner { ...@@ -314,6 +315,11 @@ public class PreprocessingRunner implements ApplicationRunner {
314 annotatePercent = 1.0f; 315 annotatePercent = 1.0f;
315 } 316 }
316 317
318 + if (properties.containsKey("annotateThreshold")) {
319 + annotateThreshold = Integer.parseInt(properties.getProperty("annotateThreshold"));
320 + } else {
321 + annotateThreshold = 1;
322 + }
317 } catch (IOException e) { 323 } catch (IOException e) {
318 throw new PreprocessingException("Can't read parameters properties file " + dataDir + parametersFilename, e); 324 throw new PreprocessingException("Can't read parameters properties file " + dataDir + parametersFilename, e);
319 } 325 }
......