Jean-Francois Leveque

Calcul des coefficients de variance et de Rutily

...@@ -17,12 +17,15 @@ public class PostprocessingExpert { ...@@ -17,12 +17,15 @@ public class PostprocessingExpert {
17 Set<Long> recommendedItemIds; 17 Set<Long> recommendedItemIds;
18 Set<Long> sampleUserIds; 18 Set<Long> sampleUserIds;
19 Map<Long, Set<Long>> sampleItemUserIds; 19 Map<Long, Set<Long>> sampleItemUserIds;
20 + Map<Long, Integer> itemRecommandationUserCounts;
20 int recommendableItemCount; 21 int recommendableItemCount;
21 int recommendedItemCount; 22 int recommendedItemCount;
22 int recommendableItemUserCount; 23 int recommendableItemUserCount;
23 int annotatedItemUserCount; 24 int annotatedItemUserCount;
24 int recommendedItemUserCount; 25 int recommendedItemUserCount;
25 int validRecommendationCount; 26 int validRecommendationCount;
27 + double itemRecommandationUserCountRelativeStdDeviation;
28 + double coefficientRutily;
26 29
27 public PostprocessingExpert(List<PostprocessingSample> sampleList, List<PostprocessingSample> recommendationList, 30 public PostprocessingExpert(List<PostprocessingSample> sampleList, List<PostprocessingSample> recommendationList,
28 List<PostprocessingSample> annotatedList) { 31 List<PostprocessingSample> annotatedList) {
...@@ -36,6 +39,10 @@ public class PostprocessingExpert { ...@@ -36,6 +39,10 @@ public class PostprocessingExpert {
36 analyzeRecommendations(); 39 analyzeRecommendations();
37 } 40 }
38 41
42 + public PostprocessingStatistics getStatistics() {
43 + return computeStatistics();
44 + }
45 +
39 public PostprocessingCoverage getCoverage() { 46 public PostprocessingCoverage getCoverage() {
40 return computeCoverage(); 47 return computeCoverage();
41 } 48 }
...@@ -84,6 +91,7 @@ public class PostprocessingExpert { ...@@ -84,6 +91,7 @@ public class PostprocessingExpert {
84 recommendedItemUserCount = 0; 91 recommendedItemUserCount = 0;
85 validRecommendationCount = 0; 92 validRecommendationCount = 0;
86 recommendedItemIds = new HashSet<>(); 93 recommendedItemIds = new HashSet<>();
94 + itemRecommandationUserCounts = new HashMap<>();
87 for (PostprocessingSample annote : annotatedList) { 95 for (PostprocessingSample annote : annotatedList) {
88 logger.trace("Annotated item {}, user {}", annote.getItemId(), annote.getUserId()); 96 logger.trace("Annotated item {}, user {}", annote.getItemId(), annote.getUserId());
89 } 97 }
...@@ -91,6 +99,13 @@ public class PostprocessingExpert { ...@@ -91,6 +99,13 @@ public class PostprocessingExpert {
91 for (PostprocessingSample reco : recommendationList) { 99 for (PostprocessingSample reco : recommendationList) {
92 Long itemId = reco.getItemId(); 100 Long itemId = reco.getItemId();
93 Long userId = reco.getUserId(); 101 Long userId = reco.getUserId();
102 +
103 + if(itemRecommandationUserCounts.containsKey(itemId)) {
104 + itemRecommandationUserCounts.put(itemId, new Integer(itemRecommandationUserCounts.get(itemId).intValue()+1));
105 + } else {
106 + itemRecommandationUserCounts.put(itemId, new Integer(1));
107 + }
108 +
94 recommendedItemIds.add(itemId); 109 recommendedItemIds.add(itemId);
95 if (sampleItemUserIds.containsKey(itemId)) { 110 if (sampleItemUserIds.containsKey(itemId)) {
96 Set<Long> set = sampleItemUserIds.get(itemId); 111 Set<Long> set = sampleItemUserIds.get(itemId);
...@@ -111,6 +126,25 @@ public class PostprocessingExpert { ...@@ -111,6 +126,25 @@ public class PostprocessingExpert {
111 logger.debug("PR: Nombre de recommandations annotées {}", validRecommendationCount); 126 logger.debug("PR: Nombre de recommandations annotées {}", validRecommendationCount);
112 } 127 }
113 128
129 + protected PostprocessingStatistics computeStatistics() {
130 + float itemRecommandationMean = 0;
131 +
132 + itemRecommandationMean = (float) recommendationList.size() / recommendedItemCount;
133 + logger.debug("S: Nombre moyen de recommandations d'un objet {}", itemRecommandationMean);
134 +
135 + double sumForVariance = 0;
136 + for (Map.Entry<Long, Integer> itemRecommandationUserCount : itemRecommandationUserCounts.entrySet()) {
137 + sumForVariance += Math.pow(itemRecommandationMean - itemRecommandationUserCount.getValue(), 2.0);
138 + }
139 + double itemRecommandationUserCountStdDeviation = Math.sqrt(sumForVariance/recommendedItemCount);
140 + itemRecommandationUserCountRelativeStdDeviation = itemRecommandationUserCountStdDeviation / itemRecommandationMean;
141 + logger.debug("S: coefficient de variation {}", itemRecommandationUserCountRelativeStdDeviation);
142 + coefficientRutily = itemRecommandationUserCountStdDeviation / recommendationList.size();
143 + logger.debug("S: coefficient Rutily {}", coefficientRutily);
144 +
145 + return new PostprocessingStatistics(itemRecommandationUserCountRelativeStdDeviation, coefficientRutily);
146 + }
147 +
114 protected PostprocessingCoverage computeCoverage() { 148 protected PostprocessingCoverage computeCoverage() {
115 float c1; 149 float c1;
116 float c2; 150 float c2;
......
...@@ -45,6 +45,9 @@ public class PostprocessingRunner implements ApplicationRunner { ...@@ -45,6 +45,9 @@ public class PostprocessingRunner implements ApplicationRunner {
45 @Value("${precisionRecall.filename}") 45 @Value("${precisionRecall.filename}")
46 private String precisionRecallFilename; 46 private String precisionRecallFilename;
47 47
48 + @Value("${statistics.filename}")
49 + private String statisticsFilename;
50 +
48 private Logger logger = LoggerFactory.getLogger(getClass()); 51 private Logger logger = LoggerFactory.getLogger(getClass());
49 private String sampleFilename; 52 private String sampleFilename;
50 private String annotatedFilename; 53 private String annotatedFilename;
...@@ -62,9 +65,11 @@ public class PostprocessingRunner implements ApplicationRunner { ...@@ -62,9 +65,11 @@ public class PostprocessingRunner implements ApplicationRunner {
62 expert.analyze(); 65 expert.analyze();
63 PostprocessingCoverage coverage = expert.getCoverage(); 66 PostprocessingCoverage coverage = expert.getCoverage();
64 PostprocessingPrecisionRecall precisionRecall = expert.getPrecisionRecall(); 67 PostprocessingPrecisionRecall precisionRecall = expert.getPrecisionRecall();
68 + PostprocessingStatistics statistics = expert.getStatistics();
65 69
66 writeCsvCoverage(coverage, dataDir, coverageFilename); 70 writeCsvCoverage(coverage, dataDir, coverageFilename);
67 writeCsvPrecisionRecall(precisionRecall, dataDir, precisionRecallFilename); 71 writeCsvPrecisionRecall(precisionRecall, dataDir, precisionRecallFilename);
72 + writeCsvStatistics(statistics, dataDir, statisticsFilename);
68 } 73 }
69 74
70 private void writeCsvCoverage(PostprocessingCoverage coverage, String dataDir, String coverageFilename) throws PostprocessingException { 75 private void writeCsvCoverage(PostprocessingCoverage coverage, String dataDir, String coverageFilename) throws PostprocessingException {
...@@ -93,6 +98,19 @@ public class PostprocessingRunner implements ApplicationRunner { ...@@ -93,6 +98,19 @@ public class PostprocessingRunner implements ApplicationRunner {
93 98
94 } 99 }
95 100
101 + private void writeCsvStatistics(PostprocessingStatistics statistics, String dataDir, String statisticsFilename) throws PostprocessingException {
102 + try {
103 + CSVPrinter csvPrinter = new CSVPrinter(new FileWriter(new File(dataDir, statisticsFilename)),
104 + CSVFormat.TDF.withHeader("Cv", "VR"));
105 + csvPrinter.printRecord(String.format(Locale.FRENCH, "%.3e", statistics.getRelativeStandardDeviation()),
106 + String.format(Locale.FRENCH, "%.3e", statistics.getCoefficientRutily()));
107 + csvPrinter.close();
108 + } catch (IOException e) {
109 + throw new PostprocessingException("Can't write statistics file " + dataDir + statisticsFilename, e);
110 + }
111 +
112 + }
113 +
96 /** 114 /**
97 * read csv (TDF) file and map it to a list of PostprocessingSample 115 * read csv (TDF) file and map it to a list of PostprocessingSample
98 * 116 *
......
1 +package org.legrog.recommendation.postprocess;
2 +
3 +public class PostprocessingStatistics {
4 + private double relativeStandardDeviation;
5 + private double coefficientRutily;
6 +
7 + public PostprocessingStatistics(double relativeStandardDeviation, double coefficientRutily) {
8 + this.relativeStandardDeviation = relativeStandardDeviation;
9 + this.coefficientRutily = coefficientRutily;
10 + }
11 +
12 + public double getRelativeStandardDeviation() {
13 + return relativeStandardDeviation;
14 + }
15 +
16 + public double getCoefficientRutily() {
17 + return coefficientRutily;
18 + }
19 +}
...@@ -6,4 +6,5 @@ ratingAnnotated.filename=${ratingAnnotated.filename} ...@@ -6,4 +6,5 @@ ratingAnnotated.filename=${ratingAnnotated.filename}
6 recommandations.filename=${recommandations.filename} 6 recommandations.filename=${recommandations.filename}
7 coverage.filename=${coverage.filename} 7 coverage.filename=${coverage.filename}
8 precisionRecall.filename=${precisionRecall.filename} 8 precisionRecall.filename=${precisionRecall.filename}
9 +statistics.filename=${statistics.filename}
9 data.dir=dumb/ 10 data.dir=dumb/
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
32 <recommandations.filename>Recommandations.csv</recommandations.filename> 32 <recommandations.filename>Recommandations.csv</recommandations.filename>
33 <coverage.filename>Couverture.txt</coverage.filename> 33 <coverage.filename>Couverture.txt</coverage.filename>
34 <precisionRecall.filename>PrecisionRappel.csv</precisionRecall.filename> 34 <precisionRecall.filename>PrecisionRappel.csv</precisionRecall.filename>
35 + <statistics.filename>Statistiques.csv</statistics.filename>
35 </properties> 36 </properties>
36 <build> 37 <build>
37 <resources> 38 <resources>
......