Showing
1 changed file
with
10 additions
and
4 deletions
... | @@ -51,6 +51,7 @@ public class PreprocessingRunner implements ApplicationRunner { | ... | @@ -51,6 +51,7 @@ public class PreprocessingRunner implements ApplicationRunner { |
51 | private Boolean ratings; | 51 | private Boolean ratings; |
52 | 52 | ||
53 | private float annotatePercent; | 53 | private float annotatePercent; |
54 | + private int annotateThreshold; | ||
54 | 55 | ||
55 | @Override | 56 | @Override |
56 | public void run(ApplicationArguments applicationArguments) throws Exception { | 57 | public void run(ApplicationArguments applicationArguments) throws Exception { |
... | @@ -58,7 +59,7 @@ public class PreprocessingRunner implements ApplicationRunner { | ... | @@ -58,7 +59,7 @@ public class PreprocessingRunner implements ApplicationRunner { |
58 | setFilenames(); | 59 | setFilenames(); |
59 | List<AssociationElement> associationElements = loadAssociationElements(new File(dataDir, completeFilename)); | 60 | List<AssociationElement> associationElements = loadAssociationElements(new File(dataDir, completeFilename)); |
60 | // associationElements = cleanupSmallCounts(associationElements, 1, 1); | 61 | // associationElements = cleanupSmallCounts(associationElements, 1, 1); |
61 | - List<AssociationElement> annotableElements = removeFirstSmallCounts(associationElements, 1, 1); | 62 | + List<AssociationElement> annotableElements = removeFirstSmallCounts(associationElements); |
62 | List<Integer> annotateIndexes = chooseAnnotated(associationElements, annotableElements, 1, 1); | 63 | List<Integer> annotateIndexes = chooseAnnotated(associationElements, annotableElements, 1, 1); |
63 | writeSampleAndAnnotated(new File(dataDir, sampleFilename), new File(dataDir, annontatedFilename), annotateIndexes, associationElements, annotableElements); | 64 | writeSampleAndAnnotated(new File(dataDir, sampleFilename), new File(dataDir, annontatedFilename), annotateIndexes, associationElements, annotableElements); |
64 | } | 65 | } |
... | @@ -116,7 +117,7 @@ public class PreprocessingRunner implements ApplicationRunner { | ... | @@ -116,7 +117,7 @@ public class PreprocessingRunner implements ApplicationRunner { |
116 | return associationElements; | 117 | return associationElements; |
117 | } | 118 | } |
118 | 119 | ||
119 | - private List<AssociationElement> removeFirstSmallCounts(List<AssociationElement> associationElements, int userSize, int itemSize) { | 120 | + private List<AssociationElement> removeFirstSmallCounts(List<AssociationElement> associationElements) { |
120 | 121 | ||
121 | boolean removedUser; | 122 | boolean removedUser; |
122 | boolean removedItem; | 123 | boolean removedItem; |
... | @@ -132,7 +133,7 @@ public class PreprocessingRunner implements ApplicationRunner { | ... | @@ -132,7 +133,7 @@ public class PreprocessingRunner implements ApplicationRunner { |
132 | itemIdSet = associationElements.stream().map(element -> element.getItemId()).collect(Collectors.toSet()); | 133 | itemIdSet = associationElements.stream().map(element -> element.getItemId()).collect(Collectors.toSet()); |
133 | for (Long itemId : itemIdSet) { | 134 | for (Long itemId : itemIdSet) { |
134 | userCount = associationElements.stream().filter(element -> element.getItemId() == itemId).count(); | 135 | userCount = associationElements.stream().filter(element -> element.getItemId() == itemId).count(); |
135 | - if (userCount <= userSize) { | 136 | + if (userCount <= annotateThreshold) { |
136 | associationElements = associationElements.stream().filter(element -> element.getItemId() != itemId).collect(Collectors.toList()); | 137 | associationElements = associationElements.stream().filter(element -> element.getItemId() != itemId).collect(Collectors.toList()); |
137 | if (!removedItem) { | 138 | if (!removedItem) { |
138 | removedItem = true; | 139 | removedItem = true; |
... | @@ -149,7 +150,7 @@ public class PreprocessingRunner implements ApplicationRunner { | ... | @@ -149,7 +150,7 @@ public class PreprocessingRunner implements ApplicationRunner { |
149 | userIdSet = associationElements.stream().map(element -> element.getUserId()).collect(Collectors.toSet()); | 150 | userIdSet = associationElements.stream().map(element -> element.getUserId()).collect(Collectors.toSet()); |
150 | for (Long userId : userIdSet) { | 151 | for (Long userId : userIdSet) { |
151 | itemCount = associationElements.stream().filter(element -> element.getUserId() == userId).count(); | 152 | itemCount = associationElements.stream().filter(element -> element.getUserId() == userId).count(); |
152 | - if (itemCount <= itemSize) { | 153 | + if (itemCount <= annotateThreshold) { |
153 | associationElements = associationElements.stream().filter(element -> element.getUserId() != userId).collect(Collectors.toList()); | 154 | associationElements = associationElements.stream().filter(element -> element.getUserId() != userId).collect(Collectors.toList()); |
154 | if (!removedUser) { | 155 | if (!removedUser) { |
155 | removedUser = true; | 156 | removedUser = true; |
... | @@ -314,6 +315,11 @@ public class PreprocessingRunner implements ApplicationRunner { | ... | @@ -314,6 +315,11 @@ public class PreprocessingRunner implements ApplicationRunner { |
314 | annotatePercent = 1.0f; | 315 | annotatePercent = 1.0f; |
315 | } | 316 | } |
316 | 317 | ||
318 | + if (properties.containsKey("annotateThreshold")) { | ||
319 | + annotateThreshold = Integer.parseInt(properties.getProperty("annotateThreshold")); | ||
320 | + } else { | ||
321 | + annotateThreshold = 1; | ||
322 | + } | ||
317 | } catch (IOException e) { | 323 | } catch (IOException e) { |
318 | throw new PreprocessingException("Can't read parameters properties file " + dataDir + parametersFilename, e); | 324 | throw new PreprocessingException("Can't read parameters properties file " + dataDir + parametersFilename, e); |
319 | } | 325 | } | ... | ... |
-
Please register or login to post a comment