Showing
5 changed files
with
287 additions
and
169 deletions
1 | package org.legrog.recommendation.postprocess; | 1 | package org.legrog.recommendation.postprocess; |
2 | 2 | ||
3 | -import org.apache.commons.csv.CSVFormat; | ||
4 | -import org.apache.commons.csv.CSVRecord; | ||
5 | -import org.slf4j.Logger; | ||
6 | -import org.slf4j.LoggerFactory; | ||
7 | -import org.springframework.beans.factory.annotation.Value; | ||
8 | -import org.springframework.boot.CommandLineRunner; | ||
9 | import org.springframework.boot.SpringApplication; | 3 | import org.springframework.boot.SpringApplication; |
10 | -import org.springframework.boot.autoconfigure.EnableAutoConfiguration; | ||
11 | import org.springframework.boot.autoconfigure.SpringBootApplication; | 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; |
12 | -import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; | ||
13 | -import org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration; | ||
14 | -import org.springframework.context.annotation.Bean; | ||
15 | - | ||
16 | -import java.io.*; | ||
17 | -import java.util.*; | ||
18 | 5 | ||
19 | @SpringBootApplication | 6 | @SpringBootApplication |
20 | public class PostprocessingApplication { | 7 | public class PostprocessingApplication { |
21 | 8 | ||
22 | - Logger logger = LoggerFactory.getLogger(getClass()); | ||
23 | - | ||
24 | - @Value("${parameters.filename}") | ||
25 | - String parametersFilename; | ||
26 | - @Value("${data.dir}") | ||
27 | - String dataDir; | ||
28 | - @Value("${collectionSample.filename}") | ||
29 | - String collectionSampleFilename; | ||
30 | - @Value("${ratingSample.filename}") | ||
31 | - String ratingSampleFilename; | ||
32 | - @Value("${recommandations.filename}") | ||
33 | - String recommandationsFilename; | ||
34 | - String sampleFilename; | ||
35 | - Properties properties; | ||
36 | - Set<Long> sampleItemIds; | ||
37 | - Set<Long> recommendedItemIds; | ||
38 | - Set<Long> sampleUserIds; | ||
39 | - Map<Long, Set<Long>> sampleItemUserIds; | ||
40 | - int recommendableItemCount; | ||
41 | - int recommendedItemCount; | ||
42 | - int recommendableItemUserCount; | ||
43 | - int recommendedItemUserCount; | ||
44 | - | ||
45 | public static void main(String[] args) { | 9 | public static void main(String[] args) { |
46 | SpringApplication.run(PostprocessingApplication.class, args); | 10 | SpringApplication.run(PostprocessingApplication.class, args); |
47 | } | 11 | } |
48 | - | ||
49 | - @Bean | ||
50 | - public CommandLineRunner postprocess() { | ||
51 | - return (args) -> this.run(); | ||
52 | - } | ||
53 | - | ||
54 | - public void run() { | ||
55 | - loadParametersProperties(); | ||
56 | - loadSampleFilename(); | ||
57 | - analyzeSample(); | ||
58 | - analyzeRecommendations(); | ||
59 | - computeCoverage(); | ||
60 | - | ||
61 | - } | ||
62 | - | ||
63 | - void computeCoverage() { | ||
64 | - float c1; | ||
65 | - float c2; | ||
66 | - int c3; | ||
67 | - | ||
68 | - logger.trace("Nombre d'objets recommandés {}", recommendedItemCount); | ||
69 | - logger.trace("Nombre d'objets recommandables {}", recommendableItemCount); | ||
70 | - c1 = (float) recommendedItemCount / recommendableItemCount; | ||
71 | - logger.trace("c1 {}", String.format("%.3f", c1)); | ||
72 | - logger.trace("Nombre de couples item-user recommandés {}", recommendedItemUserCount); | ||
73 | - logger.trace("Nombre de couples item-user recommandables {}", recommendableItemUserCount); | ||
74 | - c2 = (float) recommendedItemUserCount / recommendableItemUserCount; | ||
75 | - logger.trace("c2 {}", String.format("%.3f", c2)); | ||
76 | - c3 = recommendedItemCount; | ||
77 | - logger.trace("c3 {}", c3); | ||
78 | - } | ||
79 | - | ||
80 | - void analyzeRecommendations() { | ||
81 | - Reader in = null; | ||
82 | - | ||
83 | - try { | ||
84 | - recommendedItemUserCount = 0; | ||
85 | - recommendedItemIds = new HashSet<>(); | ||
86 | - in = new InputStreamReader(new FileInputStream(dataDir + recommandationsFilename)); | ||
87 | - Iterable<CSVRecord> records = CSVFormat.TDF.withFirstRecordAsHeader().parse(in); | ||
88 | - for (CSVRecord record : records) { | ||
89 | - Long itemId = Long.parseLong(record.get("itemId")); | ||
90 | - Long userId = Long.parseLong(record.get("userId")); | ||
91 | - recommendedItemIds.add(itemId); | ||
92 | - if (sampleItemUserIds.containsKey(itemId)) { | ||
93 | - Set<Long> set = sampleItemUserIds.get(itemId); | ||
94 | - if (!set.contains(userId)) { | ||
95 | - recommendedItemUserCount++; | ||
96 | - } | ||
97 | - } | ||
98 | - } | ||
99 | - recommendedItemCount = recommendedItemIds.size(); | ||
100 | - logger.trace("Nombre d'objets recommandés {}", recommendedItemCount); | ||
101 | - logger.trace("Nombre de couples item-user recommandés {}", recommendedItemUserCount); | ||
102 | - } catch (IOException e) { | ||
103 | - logger.error("analyzeRecommendations IOException : {}", e.getStackTrace()); | ||
104 | - } | ||
105 | - | ||
106 | - } | ||
107 | - | ||
108 | - void analyzeSample() { | ||
109 | - | ||
110 | - Reader in = null; | ||
111 | - try { | ||
112 | - sampleItemIds = new HashSet<>(); | ||
113 | - sampleUserIds = new HashSet<>(); | ||
114 | - sampleItemUserIds = new HashMap<>(); | ||
115 | - in = new InputStreamReader(new FileInputStream(dataDir + sampleFilename)); | ||
116 | - Iterable<CSVRecord> records = CSVFormat.TDF.withFirstRecordAsHeader().parse(in); | ||
117 | - for (CSVRecord record : records) { | ||
118 | - Long itemId = Long.parseLong(record.get("itemId")); | ||
119 | - Long userId = Long.parseLong(record.get("userId")); | ||
120 | - sampleItemIds.add(itemId); | ||
121 | - sampleUserIds.add(userId); | ||
122 | - if (!sampleItemUserIds.containsKey(itemId)) { | ||
123 | - Set<Long> set = new HashSet<>(); | ||
124 | - set.add(userId); | ||
125 | - sampleItemUserIds.put(itemId, set); | ||
126 | - } else { | ||
127 | - Set<Long> set = sampleItemUserIds.get(itemId); | ||
128 | - set.add(userId); | ||
129 | - sampleItemUserIds.put(itemId, set); | ||
130 | - } | ||
131 | - } | ||
132 | - recommendableItemCount = sampleItemIds.size(); | ||
133 | - logger.trace("Nombre d'objets recommandables {}", recommendableItemCount); | ||
134 | - logger.trace("Taille de la matrice item-user {}", sampleItemIds.size() * sampleUserIds.size()); | ||
135 | - | ||
136 | - int sampleCoupleCount = 0; | ||
137 | - for (Long itemId : sampleItemIds) { | ||
138 | - sampleCoupleCount += sampleItemUserIds.get(itemId).size(); | ||
139 | - } | ||
140 | - | ||
141 | - recommendableItemUserCount = sampleItemIds.size() * sampleUserIds.size() - sampleCoupleCount; | ||
142 | - logger.trace("Nombre de couples item-user dans l'échantillon {}", sampleCoupleCount); | ||
143 | - logger.trace("Nombre de couples item-user recommandables {}", recommendableItemUserCount); | ||
144 | - | ||
145 | - } catch (IOException e) { | ||
146 | - logger.error("analyzeSample IOException : {}", e.getStackTrace()); | ||
147 | - } | ||
148 | - | ||
149 | - } | ||
150 | - | ||
151 | - void loadSampleFilename() { | ||
152 | - if (!properties.containsKey("ratings")) { | ||
153 | - return; | ||
154 | - } else { | ||
155 | - logger.trace("ratings {}", properties.getProperty("ratings")); | ||
156 | - if (Boolean.parseBoolean(properties.getProperty("ratings"))) { | ||
157 | - sampleFilename = ratingSampleFilename; | ||
158 | - } else { | ||
159 | - sampleFilename = collectionSampleFilename; | ||
160 | - } | ||
161 | - | ||
162 | - logger.trace("sampleFilename {}", sampleFilename); | ||
163 | - } | ||
164 | - } | ||
165 | - | ||
166 | - void loadParametersProperties() { | ||
167 | - | ||
168 | - Properties properties = new Properties(); | ||
169 | - InputStream in = null; | ||
170 | - try { | ||
171 | - in = new FileInputStream(dataDir + parametersFilename); | ||
172 | - properties.load(in); | ||
173 | - in.close(); | ||
174 | - } catch (IOException e) { | ||
175 | - logger.error("loadParametersProperties IOException : {}", e.getStackTrace()); | ||
176 | - } | ||
177 | - | ||
178 | - this.properties = properties; | ||
179 | - } | ||
180 | - | ||
181 | } | 12 | } | ... | ... |
1 | +package org.legrog.recommendation.postprocess; | ||
2 | + | ||
3 | + | ||
4 | +public class PostprocessingCoverage { | ||
5 | + | ||
6 | + private float c1; | ||
7 | + private float c2; | ||
8 | + private int c3; | ||
9 | + | ||
10 | + public PostprocessingCoverage(float c1, float c2, int c3) { | ||
11 | + this.c1 = c1; | ||
12 | + this.c2 = c2; | ||
13 | + this.c3 = c3; | ||
14 | + } | ||
15 | + | ||
16 | + public float getC1() { | ||
17 | + return c1; | ||
18 | + } | ||
19 | + | ||
20 | + public float getC2() { | ||
21 | + return c2; | ||
22 | + } | ||
23 | + | ||
24 | + public int getC3() { | ||
25 | + return c3; | ||
26 | + } | ||
27 | +} |
1 | +package org.legrog.recommendation.postprocess; | ||
2 | + | ||
3 | + | ||
4 | +import org.slf4j.Logger; | ||
5 | +import org.slf4j.LoggerFactory; | ||
6 | + | ||
7 | +import java.util.*; | ||
8 | + | ||
9 | +public class PostprocessingExpert { | ||
10 | + | ||
11 | + List<PostprocessingSample> sampleList; | ||
12 | + List<PostprocessingSample> recommendationList; | ||
13 | + Logger logger = LoggerFactory.getLogger(getClass()); | ||
14 | + | ||
15 | + Set<Long> sampleItemIds; | ||
16 | + Set<Long> recommendedItemIds; | ||
17 | + Set<Long> sampleUserIds; | ||
18 | + Map<Long, Set<Long>> sampleItemUserIds; | ||
19 | + int recommendableItemCount; | ||
20 | + int recommendedItemCount; | ||
21 | + int recommendableItemUserCount; | ||
22 | + int recommendedItemUserCount; | ||
23 | + | ||
24 | + public PostprocessingExpert(List<PostprocessingSample> sampleList, List<PostprocessingSample> recommendationList) { | ||
25 | + this.sampleList = sampleList; | ||
26 | + this.recommendationList = recommendationList; | ||
27 | + } | ||
28 | + | ||
29 | + public PostprocessingCoverage getCoverage() { | ||
30 | + analyzeSample(); | ||
31 | + analyzeRecommendations(); | ||
32 | + return computeCoverage(); | ||
33 | + } | ||
34 | + | ||
35 | + protected void analyzeSample() { | ||
36 | + | ||
37 | + sampleItemIds = new HashSet<>(); | ||
38 | + sampleUserIds = new HashSet<>(); | ||
39 | + sampleItemUserIds = new HashMap<>(); | ||
40 | + | ||
41 | + for (PostprocessingSample sample : sampleList) { | ||
42 | + Long itemId = sample.getItemId(); | ||
43 | + Long userId = sample.getUserId(); | ||
44 | + sampleItemIds.add(itemId); | ||
45 | + sampleUserIds.add(userId); | ||
46 | + if (!sampleItemUserIds.containsKey(itemId)) { | ||
47 | + Set<Long> set = new HashSet<>(); | ||
48 | + set.add(userId); | ||
49 | + sampleItemUserIds.put(itemId, set); | ||
50 | + } else { | ||
51 | + Set<Long> set = sampleItemUserIds.get(itemId); | ||
52 | + set.add(userId); | ||
53 | + sampleItemUserIds.put(itemId, set); | ||
54 | + } | ||
55 | + } | ||
56 | + | ||
57 | + recommendableItemCount = sampleItemIds.size(); | ||
58 | + logger.trace("Nombre d'objets recommandables {}", recommendableItemCount); | ||
59 | + logger.trace("Taille de la matrice item-user {}", sampleItemIds.size() * sampleUserIds.size()); | ||
60 | + | ||
61 | + int sampleCoupleCount = 0; | ||
62 | + for (Long itemId : sampleItemIds) { | ||
63 | + sampleCoupleCount += sampleItemUserIds.get(itemId).size(); | ||
64 | + } | ||
65 | + | ||
66 | + recommendableItemUserCount = sampleItemIds.size() * sampleUserIds.size() - sampleCoupleCount; | ||
67 | + logger.trace("Nombre de couples item-user dans l'échantillon {}", sampleCoupleCount); | ||
68 | + logger.trace("Nombre de couples item-user recommandables {}", recommendableItemUserCount); | ||
69 | + } | ||
70 | + | ||
71 | + protected void analyzeRecommendations() { | ||
72 | + recommendedItemUserCount = 0; | ||
73 | + recommendedItemIds = new HashSet<>(); | ||
74 | + for (PostprocessingSample reco : recommendationList) { | ||
75 | + Long itemId = reco.getItemId(); | ||
76 | + Long userId = reco.getUserId(); | ||
77 | + recommendedItemIds.add(itemId); | ||
78 | + if (sampleItemUserIds.containsKey(itemId)) { | ||
79 | + Set<Long> set = sampleItemUserIds.get(itemId); | ||
80 | + if (!set.contains(userId)) { | ||
81 | + recommendedItemUserCount++; | ||
82 | + } | ||
83 | + } | ||
84 | + } | ||
85 | + recommendedItemCount = recommendedItemIds.size(); | ||
86 | + logger.trace("Nombre d'objets recommandés {}", recommendedItemCount); | ||
87 | + logger.trace("Nombre de couples item-user recommandés {}", recommendedItemUserCount); | ||
88 | + } | ||
89 | + | ||
90 | + protected PostprocessingCoverage computeCoverage() { | ||
91 | + float c1; | ||
92 | + float c2; | ||
93 | + int c3; | ||
94 | + | ||
95 | + logger.trace("Nombre d'objets recommandés {}", recommendedItemCount); | ||
96 | + logger.trace("Nombre d'objets recommandables {}", recommendableItemCount); | ||
97 | + c1 = (float) recommendedItemCount / recommendableItemCount; | ||
98 | + logger.trace("c1 {}", String.format("%.3f", c1)); | ||
99 | + logger.trace("Nombre de couples item-user recommandés {}", recommendedItemUserCount); | ||
100 | + logger.trace("Nombre de couples item-user recommandables {}", recommendableItemUserCount); | ||
101 | + c2 = (float) recommendedItemUserCount / recommendableItemUserCount; | ||
102 | + logger.trace("c2 {}", String.format("%.3f", c2)); | ||
103 | + c3 = recommendedItemCount; | ||
104 | + logger.trace("c3 {}", c3); | ||
105 | + | ||
106 | + return new PostprocessingCoverage(c1,c2, c3); | ||
107 | + } | ||
108 | + | ||
109 | + | ||
110 | + | ||
111 | + | ||
112 | + | ||
113 | +} |
1 | +package org.legrog.recommendation.postprocess; | ||
2 | + | ||
3 | +import org.apache.commons.csv.CSVFormat; | ||
4 | +import org.apache.commons.csv.CSVRecord; | ||
5 | +import org.slf4j.Logger; | ||
6 | +import org.slf4j.LoggerFactory; | ||
7 | +import org.springframework.beans.factory.annotation.Value; | ||
8 | +import org.springframework.boot.ApplicationArguments; | ||
9 | +import org.springframework.boot.ApplicationRunner; | ||
10 | +import org.springframework.stereotype.Component; | ||
11 | + | ||
12 | +import java.io.*; | ||
13 | +import java.util.List; | ||
14 | +import java.util.Properties; | ||
15 | +import java.util.stream.Collectors; | ||
16 | +import java.util.stream.StreamSupport; | ||
17 | + | ||
18 | +@Component | ||
19 | +public class PostprocessingRunner implements ApplicationRunner { | ||
20 | + | ||
21 | + @Value("${parameters.filename}") | ||
22 | + private String parametersFilename; | ||
23 | + | ||
24 | + @Value("${data.dir}") | ||
25 | + private String dataDir; | ||
26 | + | ||
27 | + @Value("${collectionSample.filename}") | ||
28 | + private String collectionSampleFilename; | ||
29 | + | ||
30 | + @Value("${ratingSample.filename}") | ||
31 | + private String ratingSampleFilename; | ||
32 | + | ||
33 | + @Value("${recommandations.filename}") | ||
34 | + private String recommandationsFilename; | ||
35 | + | ||
36 | + private Logger logger = LoggerFactory.getLogger(getClass()); | ||
37 | + private String sampleFilename; | ||
38 | + | ||
39 | + | ||
40 | + @Override | ||
41 | + public void run(ApplicationArguments args) throws Exception { | ||
42 | + | ||
43 | + loadSampleFilename(); | ||
44 | + List<PostprocessingSample> samples = loadCsvSample(new File(dataDir, sampleFilename)); | ||
45 | + List<PostprocessingSample> recommendations = loadCsvSample(new File(dataDir, recommandationsFilename)); | ||
46 | + | ||
47 | + PostprocessingExpert expert = new PostprocessingExpert(samples, recommendations); | ||
48 | + PostprocessingCoverage coverage = expert.getCoverage(); | ||
49 | + | ||
50 | + //todo write coverage in a file to be read by user | ||
51 | + //... | ||
52 | + } | ||
53 | + | ||
54 | + /** | ||
55 | + * read csv (TDF) file and map it to a list of PostprocessingSample | ||
56 | + * | ||
57 | + * @param file | ||
58 | + * @return | ||
59 | + * @throws PostprocessingException | ||
60 | + */ | ||
61 | + private List<PostprocessingSample> loadCsvSample(File file) throws PostprocessingException { | ||
62 | + try (Reader in = new InputStreamReader(new FileInputStream(file))) { | ||
63 | + Iterable<CSVRecord> records = CSVFormat.TDF.withFirstRecordAsHeader().parse(in); | ||
64 | + | ||
65 | + return StreamSupport.stream(records.spliterator(), false) | ||
66 | + .map((record) -> new PostprocessingSample( | ||
67 | + Long.parseLong(record.get("userId")), | ||
68 | + Long.parseLong(record.get("itemId"))) | ||
69 | + ) | ||
70 | + .collect(Collectors.toList()); | ||
71 | + | ||
72 | + } catch (IOException e) { | ||
73 | + throw new PostprocessingException("Can't read CSV file " + sampleFilename, e); | ||
74 | + } | ||
75 | + | ||
76 | + } | ||
77 | + | ||
78 | + /** | ||
79 | + * read properties file from application.properties parameter.fileName then search for rating property inside | ||
80 | + * depending of which, select rating or collection file as the sample file | ||
81 | + * | ||
82 | + * todo replace this by a command line switch ? | ||
83 | + * | ||
84 | + * @throws PostprocessingException | ||
85 | + */ | ||
86 | + private void loadSampleFilename() throws PostprocessingException { | ||
87 | + try (InputStream in = new FileInputStream(new File(dataDir, parametersFilename))) { | ||
88 | + Properties properties = new Properties(); | ||
89 | + properties.load(in); | ||
90 | + if (properties.containsKey("ratings")) { | ||
91 | + logger.trace("ratings {}", properties.getProperty("ratings")); | ||
92 | + if (Boolean.parseBoolean(properties.getProperty("ratings"))) { | ||
93 | + sampleFilename = ratingSampleFilename; | ||
94 | + } else { | ||
95 | + sampleFilename = collectionSampleFilename; | ||
96 | + } | ||
97 | + } else { | ||
98 | + // by default, takes collection | ||
99 | + sampleFilename = collectionSampleFilename; | ||
100 | + } | ||
101 | + } catch (IOException e) { | ||
102 | + throw new PostprocessingException("Can't read properties file " + parametersFilename, e); | ||
103 | + } | ||
104 | + } | ||
105 | + | ||
106 | + | ||
107 | + private class PostprocessingException extends Exception { | ||
108 | + public PostprocessingException() { | ||
109 | + super(); | ||
110 | + } | ||
111 | + | ||
112 | + public PostprocessingException(String message) { | ||
113 | + super(message); | ||
114 | + } | ||
115 | + | ||
116 | + public PostprocessingException(String message, Throwable cause) { | ||
117 | + super(message, cause); | ||
118 | + } | ||
119 | + | ||
120 | + public PostprocessingException(Throwable cause) { | ||
121 | + super(cause); | ||
122 | + } | ||
123 | + | ||
124 | + protected PostprocessingException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { | ||
125 | + super(message, cause, enableSuppression, writableStackTrace); | ||
126 | + } | ||
127 | + } | ||
128 | +} |
1 | +package org.legrog.recommendation.postprocess; | ||
2 | + | ||
3 | +public class PostprocessingSample { | ||
4 | + private Long userId; | ||
5 | + private Long itemId; | ||
6 | + | ||
7 | + public PostprocessingSample(Long userId, Long itemId) { | ||
8 | + this.userId = userId; | ||
9 | + this.itemId = itemId; | ||
10 | + } | ||
11 | + | ||
12 | + public Long getUserId() { | ||
13 | + return userId; | ||
14 | + } | ||
15 | + | ||
16 | + public Long getItemId() { | ||
17 | + return itemId; | ||
18 | + } | ||
19 | +} |
-
Please register or login to post a comment