JR Utily

refactor post processing

1 package org.legrog.recommendation.postprocess; 1 package org.legrog.recommendation.postprocess;
2 2
3 -import org.apache.commons.csv.CSVFormat;
4 -import org.apache.commons.csv.CSVRecord;
5 -import org.slf4j.Logger;
6 -import org.slf4j.LoggerFactory;
7 -import org.springframework.beans.factory.annotation.Value;
8 -import org.springframework.boot.CommandLineRunner;
9 import org.springframework.boot.SpringApplication; 3 import org.springframework.boot.SpringApplication;
10 -import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
11 import org.springframework.boot.autoconfigure.SpringBootApplication; 4 import org.springframework.boot.autoconfigure.SpringBootApplication;
12 -import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
13 -import org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration;
14 -import org.springframework.context.annotation.Bean;
15 -
16 -import java.io.*;
17 -import java.util.*;
18 5
19 @SpringBootApplication 6 @SpringBootApplication
20 public class PostprocessingApplication { 7 public class PostprocessingApplication {
21 8
22 - Logger logger = LoggerFactory.getLogger(getClass());
23 -
24 - @Value("${parameters.filename}")
25 - String parametersFilename;
26 - @Value("${data.dir}")
27 - String dataDir;
28 - @Value("${collectionSample.filename}")
29 - String collectionSampleFilename;
30 - @Value("${ratingSample.filename}")
31 - String ratingSampleFilename;
32 - @Value("${recommandations.filename}")
33 - String recommandationsFilename;
34 - String sampleFilename;
35 - Properties properties;
36 - Set<Long> sampleItemIds;
37 - Set<Long> recommendedItemIds;
38 - Set<Long> sampleUserIds;
39 - Map<Long, Set<Long>> sampleItemUserIds;
40 - int recommendableItemCount;
41 - int recommendedItemCount;
42 - int recommendableItemUserCount;
43 - int recommendedItemUserCount;
44 -
45 public static void main(String[] args) { 9 public static void main(String[] args) {
46 SpringApplication.run(PostprocessingApplication.class, args); 10 SpringApplication.run(PostprocessingApplication.class, args);
47 } 11 }
48 -
49 - @Bean
50 - public CommandLineRunner postprocess() {
51 - return (args) -> this.run();
52 - }
53 -
54 - public void run() {
55 - loadParametersProperties();
56 - loadSampleFilename();
57 - analyzeSample();
58 - analyzeRecommendations();
59 - computeCoverage();
60 -
61 - }
62 -
63 - void computeCoverage() {
64 - float c1;
65 - float c2;
66 - int c3;
67 -
68 - logger.trace("Nombre d'objets recommandés {}", recommendedItemCount);
69 - logger.trace("Nombre d'objets recommandables {}", recommendableItemCount);
70 - c1 = (float) recommendedItemCount / recommendableItemCount;
71 - logger.trace("c1 {}", String.format("%.3f", c1));
72 - logger.trace("Nombre de couples item-user recommandés {}", recommendedItemUserCount);
73 - logger.trace("Nombre de couples item-user recommandables {}", recommendableItemUserCount);
74 - c2 = (float) recommendedItemUserCount / recommendableItemUserCount;
75 - logger.trace("c2 {}", String.format("%.3f", c2));
76 - c3 = recommendedItemCount;
77 - logger.trace("c3 {}", c3);
78 - }
79 -
80 - void analyzeRecommendations() {
81 - Reader in = null;
82 -
83 - try {
84 - recommendedItemUserCount = 0;
85 - recommendedItemIds = new HashSet<>();
86 - in = new InputStreamReader(new FileInputStream(dataDir + recommandationsFilename));
87 - Iterable<CSVRecord> records = CSVFormat.TDF.withFirstRecordAsHeader().parse(in);
88 - for (CSVRecord record : records) {
89 - Long itemId = Long.parseLong(record.get("itemId"));
90 - Long userId = Long.parseLong(record.get("userId"));
91 - recommendedItemIds.add(itemId);
92 - if (sampleItemUserIds.containsKey(itemId)) {
93 - Set<Long> set = sampleItemUserIds.get(itemId);
94 - if (!set.contains(userId)) {
95 - recommendedItemUserCount++;
96 - }
97 - }
98 - }
99 - recommendedItemCount = recommendedItemIds.size();
100 - logger.trace("Nombre d'objets recommandés {}", recommendedItemCount);
101 - logger.trace("Nombre de couples item-user recommandés {}", recommendedItemUserCount);
102 - } catch (IOException e) {
103 - logger.error("analyzeRecommendations IOException : {}", e.getStackTrace());
104 - }
105 -
106 - }
107 -
108 - void analyzeSample() {
109 -
110 - Reader in = null;
111 - try {
112 - sampleItemIds = new HashSet<>();
113 - sampleUserIds = new HashSet<>();
114 - sampleItemUserIds = new HashMap<>();
115 - in = new InputStreamReader(new FileInputStream(dataDir + sampleFilename));
116 - Iterable<CSVRecord> records = CSVFormat.TDF.withFirstRecordAsHeader().parse(in);
117 - for (CSVRecord record : records) {
118 - Long itemId = Long.parseLong(record.get("itemId"));
119 - Long userId = Long.parseLong(record.get("userId"));
120 - sampleItemIds.add(itemId);
121 - sampleUserIds.add(userId);
122 - if (!sampleItemUserIds.containsKey(itemId)) {
123 - Set<Long> set = new HashSet<>();
124 - set.add(userId);
125 - sampleItemUserIds.put(itemId, set);
126 - } else {
127 - Set<Long> set = sampleItemUserIds.get(itemId);
128 - set.add(userId);
129 - sampleItemUserIds.put(itemId, set);
130 - }
131 - }
132 - recommendableItemCount = sampleItemIds.size();
133 - logger.trace("Nombre d'objets recommandables {}", recommendableItemCount);
134 - logger.trace("Taille de la matrice item-user {}", sampleItemIds.size() * sampleUserIds.size());
135 -
136 - int sampleCoupleCount = 0;
137 - for (Long itemId : sampleItemIds) {
138 - sampleCoupleCount += sampleItemUserIds.get(itemId).size();
139 - }
140 -
141 - recommendableItemUserCount = sampleItemIds.size() * sampleUserIds.size() - sampleCoupleCount;
142 - logger.trace("Nombre de couples item-user dans l'échantillon {}", sampleCoupleCount);
143 - logger.trace("Nombre de couples item-user recommandables {}", recommendableItemUserCount);
144 -
145 - } catch (IOException e) {
146 - logger.error("analyzeSample IOException : {}", e.getStackTrace());
147 - }
148 -
149 - }
150 -
151 - void loadSampleFilename() {
152 - if (!properties.containsKey("ratings")) {
153 - return;
154 - } else {
155 - logger.trace("ratings {}", properties.getProperty("ratings"));
156 - if (Boolean.parseBoolean(properties.getProperty("ratings"))) {
157 - sampleFilename = ratingSampleFilename;
158 - } else {
159 - sampleFilename = collectionSampleFilename;
160 - }
161 -
162 - logger.trace("sampleFilename {}", sampleFilename);
163 - }
164 - }
165 -
166 - void loadParametersProperties() {
167 -
168 - Properties properties = new Properties();
169 - InputStream in = null;
170 - try {
171 - in = new FileInputStream(dataDir + parametersFilename);
172 - properties.load(in);
173 - in.close();
174 - } catch (IOException e) {
175 - logger.error("loadParametersProperties IOException : {}", e.getStackTrace());
176 - }
177 -
178 - this.properties = properties;
179 - }
180 -
181 } 12 }
......
1 +package org.legrog.recommendation.postprocess;
2 +
3 +
4 +public class PostprocessingCoverage {
5 +
6 + private float c1;
7 + private float c2;
8 + private int c3;
9 +
10 + public PostprocessingCoverage(float c1, float c2, int c3) {
11 + this.c1 = c1;
12 + this.c2 = c2;
13 + this.c3 = c3;
14 + }
15 +
16 + public float getC1() {
17 + return c1;
18 + }
19 +
20 + public float getC2() {
21 + return c2;
22 + }
23 +
24 + public int getC3() {
25 + return c3;
26 + }
27 +}
1 +package org.legrog.recommendation.postprocess;
2 +
3 +
4 +import org.slf4j.Logger;
5 +import org.slf4j.LoggerFactory;
6 +
7 +import java.util.*;
8 +
9 +public class PostprocessingExpert {
10 +
11 + List<PostprocessingSample> sampleList;
12 + List<PostprocessingSample> recommendationList;
13 + Logger logger = LoggerFactory.getLogger(getClass());
14 +
15 + Set<Long> sampleItemIds;
16 + Set<Long> recommendedItemIds;
17 + Set<Long> sampleUserIds;
18 + Map<Long, Set<Long>> sampleItemUserIds;
19 + int recommendableItemCount;
20 + int recommendedItemCount;
21 + int recommendableItemUserCount;
22 + int recommendedItemUserCount;
23 +
24 + public PostprocessingExpert(List<PostprocessingSample> sampleList, List<PostprocessingSample> recommendationList) {
25 + this.sampleList = sampleList;
26 + this.recommendationList = recommendationList;
27 + }
28 +
29 + public PostprocessingCoverage getCoverage() {
30 + analyzeSample();
31 + analyzeRecommendations();
32 + return computeCoverage();
33 + }
34 +
35 + protected void analyzeSample() {
36 +
37 + sampleItemIds = new HashSet<>();
38 + sampleUserIds = new HashSet<>();
39 + sampleItemUserIds = new HashMap<>();
40 +
41 + for (PostprocessingSample sample : sampleList) {
42 + Long itemId = sample.getItemId();
43 + Long userId = sample.getUserId();
44 + sampleItemIds.add(itemId);
45 + sampleUserIds.add(userId);
46 + if (!sampleItemUserIds.containsKey(itemId)) {
47 + Set<Long> set = new HashSet<>();
48 + set.add(userId);
49 + sampleItemUserIds.put(itemId, set);
50 + } else {
51 + Set<Long> set = sampleItemUserIds.get(itemId);
52 + set.add(userId);
53 + sampleItemUserIds.put(itemId, set);
54 + }
55 + }
56 +
57 + recommendableItemCount = sampleItemIds.size();
58 + logger.trace("Nombre d'objets recommandables {}", recommendableItemCount);
59 + logger.trace("Taille de la matrice item-user {}", sampleItemIds.size() * sampleUserIds.size());
60 +
61 + int sampleCoupleCount = 0;
62 + for (Long itemId : sampleItemIds) {
63 + sampleCoupleCount += sampleItemUserIds.get(itemId).size();
64 + }
65 +
66 + recommendableItemUserCount = sampleItemIds.size() * sampleUserIds.size() - sampleCoupleCount;
67 + logger.trace("Nombre de couples item-user dans l'échantillon {}", sampleCoupleCount);
68 + logger.trace("Nombre de couples item-user recommandables {}", recommendableItemUserCount);
69 + }
70 +
71 + protected void analyzeRecommendations() {
72 + recommendedItemUserCount = 0;
73 + recommendedItemIds = new HashSet<>();
74 + for (PostprocessingSample reco : recommendationList) {
75 + Long itemId = reco.getItemId();
76 + Long userId = reco.getUserId();
77 + recommendedItemIds.add(itemId);
78 + if (sampleItemUserIds.containsKey(itemId)) {
79 + Set<Long> set = sampleItemUserIds.get(itemId);
80 + if (!set.contains(userId)) {
81 + recommendedItemUserCount++;
82 + }
83 + }
84 + }
85 + recommendedItemCount = recommendedItemIds.size();
86 + logger.trace("Nombre d'objets recommandés {}", recommendedItemCount);
87 + logger.trace("Nombre de couples item-user recommandés {}", recommendedItemUserCount);
88 + }
89 +
90 + protected PostprocessingCoverage computeCoverage() {
91 + float c1;
92 + float c2;
93 + int c3;
94 +
95 + logger.trace("Nombre d'objets recommandés {}", recommendedItemCount);
96 + logger.trace("Nombre d'objets recommandables {}", recommendableItemCount);
97 + c1 = (float) recommendedItemCount / recommendableItemCount;
98 + logger.trace("c1 {}", String.format("%.3f", c1));
99 + logger.trace("Nombre de couples item-user recommandés {}", recommendedItemUserCount);
100 + logger.trace("Nombre de couples item-user recommandables {}", recommendableItemUserCount);
101 + c2 = (float) recommendedItemUserCount / recommendableItemUserCount;
102 + logger.trace("c2 {}", String.format("%.3f", c2));
103 + c3 = recommendedItemCount;
104 + logger.trace("c3 {}", c3);
105 +
106 + return new PostprocessingCoverage(c1,c2, c3);
107 + }
108 +
109 +
110 +
111 +
112 +
113 +}
1 +package org.legrog.recommendation.postprocess;
2 +
3 +import org.apache.commons.csv.CSVFormat;
4 +import org.apache.commons.csv.CSVRecord;
5 +import org.slf4j.Logger;
6 +import org.slf4j.LoggerFactory;
7 +import org.springframework.beans.factory.annotation.Value;
8 +import org.springframework.boot.ApplicationArguments;
9 +import org.springframework.boot.ApplicationRunner;
10 +import org.springframework.stereotype.Component;
11 +
12 +import java.io.*;
13 +import java.util.List;
14 +import java.util.Properties;
15 +import java.util.stream.Collectors;
16 +import java.util.stream.StreamSupport;
17 +
18 +@Component
19 +public class PostprocessingRunner implements ApplicationRunner {
20 +
21 + @Value("${parameters.filename}")
22 + private String parametersFilename;
23 +
24 + @Value("${data.dir}")
25 + private String dataDir;
26 +
27 + @Value("${collectionSample.filename}")
28 + private String collectionSampleFilename;
29 +
30 + @Value("${ratingSample.filename}")
31 + private String ratingSampleFilename;
32 +
33 + @Value("${recommandations.filename}")
34 + private String recommandationsFilename;
35 +
36 + private Logger logger = LoggerFactory.getLogger(getClass());
37 + private String sampleFilename;
38 +
39 +
40 + @Override
41 + public void run(ApplicationArguments args) throws Exception {
42 +
43 + loadSampleFilename();
44 + List<PostprocessingSample> samples = loadCsvSample(new File(dataDir, sampleFilename));
45 + List<PostprocessingSample> recommendations = loadCsvSample(new File(dataDir, recommandationsFilename));
46 +
47 + PostprocessingExpert expert = new PostprocessingExpert(samples, recommendations);
48 + PostprocessingCoverage coverage = expert.getCoverage();
49 +
50 + //todo write coverage in a file to be read by user
51 + //...
52 + }
53 +
54 + /**
55 + * read csv (TDF) file and map it to a list of PostprocessingSample
56 + *
57 + * @param file
58 + * @return
59 + * @throws PostprocessingException
60 + */
61 + private List<PostprocessingSample> loadCsvSample(File file) throws PostprocessingException {
62 + try (Reader in = new InputStreamReader(new FileInputStream(file))) {
63 + Iterable<CSVRecord> records = CSVFormat.TDF.withFirstRecordAsHeader().parse(in);
64 +
65 + return StreamSupport.stream(records.spliterator(), false)
66 + .map((record) -> new PostprocessingSample(
67 + Long.parseLong(record.get("userId")),
68 + Long.parseLong(record.get("itemId")))
69 + )
70 + .collect(Collectors.toList());
71 +
72 + } catch (IOException e) {
73 + throw new PostprocessingException("Can't read CSV file " + sampleFilename, e);
74 + }
75 +
76 + }
77 +
78 + /**
79 + * read properties file from application.properties parameter.fileName then search for rating property inside
80 + * depending of which, select rating or collection file as the sample file
81 + *
82 + * todo replace this by a command line switch ?
83 + *
84 + * @throws PostprocessingException
85 + */
86 + private void loadSampleFilename() throws PostprocessingException {
87 + try (InputStream in = new FileInputStream(new File(dataDir, parametersFilename))) {
88 + Properties properties = new Properties();
89 + properties.load(in);
90 + if (properties.containsKey("ratings")) {
91 + logger.trace("ratings {}", properties.getProperty("ratings"));
92 + if (Boolean.parseBoolean(properties.getProperty("ratings"))) {
93 + sampleFilename = ratingSampleFilename;
94 + } else {
95 + sampleFilename = collectionSampleFilename;
96 + }
97 + } else {
98 + // by default, takes collection
99 + sampleFilename = collectionSampleFilename;
100 + }
101 + } catch (IOException e) {
102 + throw new PostprocessingException("Can't read properties file " + parametersFilename, e);
103 + }
104 + }
105 +
106 +
107 + private class PostprocessingException extends Exception {
108 + public PostprocessingException() {
109 + super();
110 + }
111 +
112 + public PostprocessingException(String message) {
113 + super(message);
114 + }
115 +
116 + public PostprocessingException(String message, Throwable cause) {
117 + super(message, cause);
118 + }
119 +
120 + public PostprocessingException(Throwable cause) {
121 + super(cause);
122 + }
123 +
124 + protected PostprocessingException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
125 + super(message, cause, enableSuppression, writableStackTrace);
126 + }
127 + }
128 +}
1 +package org.legrog.recommendation.postprocess;
2 +
3 +public class PostprocessingSample {
4 + private Long userId;
5 + private Long itemId;
6 +
7 + public PostprocessingSample(Long userId, Long itemId) {
8 + this.userId = userId;
9 + this.itemId = itemId;
10 + }
11 +
12 + public Long getUserId() {
13 + return userId;
14 + }
15 +
16 + public Long getItemId() {
17 + return itemId;
18 + }
19 +}