Jean-Francois Leveque

Calcul des coefficients de variance et de Rutily

......@@ -17,12 +17,15 @@ public class PostprocessingExpert {
Set<Long> recommendedItemIds;
Set<Long> sampleUserIds;
Map<Long, Set<Long>> sampleItemUserIds;
Map<Long, Integer> itemRecommandationUserCounts;
int recommendableItemCount;
int recommendedItemCount;
int recommendableItemUserCount;
int annotatedItemUserCount;
int recommendedItemUserCount;
int validRecommendationCount;
double itemRecommandationUserCountRelativeStdDeviation;
double coefficientRutily;
public PostprocessingExpert(List<PostprocessingSample> sampleList, List<PostprocessingSample> recommendationList,
List<PostprocessingSample> annotatedList) {
......@@ -36,6 +39,10 @@ public class PostprocessingExpert {
analyzeRecommendations();
}
public PostprocessingStatistics getStatistics() {
return computeStatistics();
}
public PostprocessingCoverage getCoverage() {
return computeCoverage();
}
......@@ -84,6 +91,7 @@ public class PostprocessingExpert {
recommendedItemUserCount = 0;
validRecommendationCount = 0;
recommendedItemIds = new HashSet<>();
itemRecommandationUserCounts = new HashMap<>();
for (PostprocessingSample annote : annotatedList) {
logger.trace("Annotated item {}, user {}", annote.getItemId(), annote.getUserId());
}
......@@ -91,6 +99,13 @@ public class PostprocessingExpert {
for (PostprocessingSample reco : recommendationList) {
Long itemId = reco.getItemId();
Long userId = reco.getUserId();
if(itemRecommandationUserCounts.containsKey(itemId)) {
itemRecommandationUserCounts.put(itemId, new Integer(itemRecommandationUserCounts.get(itemId).intValue()+1));
} else {
itemRecommandationUserCounts.put(itemId, new Integer(1));
}
recommendedItemIds.add(itemId);
if (sampleItemUserIds.containsKey(itemId)) {
Set<Long> set = sampleItemUserIds.get(itemId);
......@@ -111,6 +126,25 @@ public class PostprocessingExpert {
logger.debug("PR: Nombre de recommandations annotées {}", validRecommendationCount);
}
protected PostprocessingStatistics computeStatistics() {
float itemRecommandationMean = 0;
itemRecommandationMean = (float) recommendationList.size() / recommendedItemCount;
logger.debug("S: Nombre moyen de recommandations d'un objet {}", itemRecommandationMean);
double sumForVariance = 0;
for (Map.Entry<Long, Integer> itemRecommandationUserCount : itemRecommandationUserCounts.entrySet()) {
sumForVariance += Math.pow(itemRecommandationMean - itemRecommandationUserCount.getValue(), 2.0);
}
double itemRecommandationUserCountStdDeviation = Math.sqrt(sumForVariance/recommendedItemCount);
itemRecommandationUserCountRelativeStdDeviation = itemRecommandationUserCountStdDeviation / itemRecommandationMean;
logger.debug("S: coefficient de variation {}", itemRecommandationUserCountRelativeStdDeviation);
coefficientRutily = itemRecommandationUserCountStdDeviation / recommendationList.size();
logger.debug("S: coefficient Rutily {}", coefficientRutily);
return new PostprocessingStatistics(itemRecommandationUserCountRelativeStdDeviation, coefficientRutily);
}
protected PostprocessingCoverage computeCoverage() {
float c1;
float c2;
......
......@@ -45,6 +45,9 @@ public class PostprocessingRunner implements ApplicationRunner {
@Value("${precisionRecall.filename}")
private String precisionRecallFilename;
@Value("${statistics.filename}")
private String statisticsFilename;
private Logger logger = LoggerFactory.getLogger(getClass());
private String sampleFilename;
private String annotatedFilename;
......@@ -62,9 +65,11 @@ public class PostprocessingRunner implements ApplicationRunner {
expert.analyze();
PostprocessingCoverage coverage = expert.getCoverage();
PostprocessingPrecisionRecall precisionRecall = expert.getPrecisionRecall();
PostprocessingStatistics statistics = expert.getStatistics();
writeCsvCoverage(coverage, dataDir, coverageFilename);
writeCsvPrecisionRecall(precisionRecall, dataDir, precisionRecallFilename);
writeCsvStatistics(statistics, dataDir, statisticsFilename);
}
private void writeCsvCoverage(PostprocessingCoverage coverage, String dataDir, String coverageFilename) throws PostprocessingException {
......@@ -93,6 +98,19 @@ public class PostprocessingRunner implements ApplicationRunner {
}
private void writeCsvStatistics(PostprocessingStatistics statistics, String dataDir, String statisticsFilename) throws PostprocessingException {
try {
CSVPrinter csvPrinter = new CSVPrinter(new FileWriter(new File(dataDir, statisticsFilename)),
CSVFormat.TDF.withHeader("Cv", "VR"));
csvPrinter.printRecord(String.format(Locale.FRENCH, "%.3e", statistics.getRelativeStandardDeviation()),
String.format(Locale.FRENCH, "%.3e", statistics.getCoefficientRutily()));
csvPrinter.close();
} catch (IOException e) {
throw new PostprocessingException("Can't write statistics file " + dataDir + statisticsFilename, e);
}
}
/**
* read csv (TDF) file and map it to a list of PostprocessingSample
*
......
package org.legrog.recommendation.postprocess;
public class PostprocessingStatistics {
private double relativeStandardDeviation;
private double coefficientRutily;
public PostprocessingStatistics(double relativeStandardDeviation, double coefficientRutily) {
this.relativeStandardDeviation = relativeStandardDeviation;
this.coefficientRutily = coefficientRutily;
}
public double getRelativeStandardDeviation() {
return relativeStandardDeviation;
}
public double getCoefficientRutily() {
return coefficientRutily;
}
}
......@@ -6,4 +6,5 @@ ratingAnnotated.filename=${ratingAnnotated.filename}
recommandations.filename=${recommandations.filename}
coverage.filename=${coverage.filename}
precisionRecall.filename=${precisionRecall.filename}
statistics.filename=${statistics.filename}
data.dir=dumb/
\ No newline at end of file
......
......@@ -32,6 +32,7 @@
<recommandations.filename>Recommandations.csv</recommandations.filename>
<coverage.filename>Couverture.txt</coverage.filename>
<precisionRecall.filename>PrecisionRappel.csv</precisionRecall.filename>
<statistics.filename>Statistiques.csv</statistics.filename>
</properties>
<build>
<resources>
......