Toggle navigation
Toggle navigation
This project
Loading...
Sign in
grogv3
/
grog-cubi
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Jean-Francois Leveque
2017-05-23 14:13:34 +0200
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
e537c51e7053af9b91c7e039fa1d4c00049475f6
e537c51e
1 parent
aef4a11d
Retraits des solitaires des annotable en une seule passe item puis user
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
66 additions
and
11 deletions
grog-recommendation/grog-recommendation-preprocess/src/main/java/org/legrog/recommendation/preprocess/PreprocessingRunner.java
grog-recommendation/grog-recommendation-preprocess/src/main/java/org/legrog/recommendation/preprocess/PreprocessingRunner.java
View file @
e537c51
...
...
@@ -58,10 +58,12 @@ public class PreprocessingRunner implements ApplicationRunner {
setFilenames
();
List
<
AssociationElement
>
associationElements
=
loadAssociationElements
(
new
File
(
dataDir
,
completeFilename
));
// associationElements = cleanupSmallCounts(associationElements, 1, 1);
List
<
Integer
>
annotateIndexes
=
chooseAnnotated
(
associationElements
,
1
,
1
);
writeSampleAndAnnotated
(
new
File
(
dataDir
,
sampleFilename
),
new
File
(
dataDir
,
annontatedFilename
),
annotateIndexes
,
associationElements
);
List
<
AssociationElement
>
annotableElements
=
removeFirstSmallCounts
(
associationElements
,
1
,
1
);
List
<
Integer
>
annotateIndexes
=
chooseAnnotated
(
associationElements
,
annotableElements
,
1
,
1
);
writeSampleAndAnnotated
(
new
File
(
dataDir
,
sampleFilename
),
new
File
(
dataDir
,
annontatedFilename
),
annotateIndexes
,
associationElements
,
annotableElements
);
}
// TODO retirer duplication de code entre cleanupSmallCounts et removeFirstSmallCounts
private
List
<
AssociationElement
>
cleanupSmallCounts
(
List
<
AssociationElement
>
associationElements
,
int
userSize
,
int
itemSize
)
{
boolean
removedUser
;
...
...
@@ -114,9 +116,59 @@ public class PreprocessingRunner implements ApplicationRunner {
return
associationElements
;
}
private
List
<
Integer
>
chooseAnnotated
(
List
<
AssociationElement
>
associationElements
,
int
userSize
,
int
itemSize
)
{
private
List
<
AssociationElement
>
removeFirstSmallCounts
(
List
<
AssociationElement
>
associationElements
,
int
userSize
,
int
itemSize
)
{
boolean
removedUser
;
boolean
removedItem
;
long
userCount
;
long
itemCount
;
Set
<
Long
>
itemIdSet
;
Set
<
Long
>
userIdSet
;
removedUser
=
false
;
removedItem
=
false
;
// Books or ratings are more alone than users, so we start with them
itemIdSet
=
associationElements
.
stream
().
map
(
element
->
element
.
getItemId
()).
collect
(
Collectors
.
toSet
());
for
(
Long
itemId
:
itemIdSet
)
{
userCount
=
associationElements
.
stream
().
filter
(
element
->
element
.
getItemId
()
==
itemId
).
count
();
if
(
userCount
<=
userSize
)
{
associationElements
=
associationElements
.
stream
().
filter
(
element
->
element
.
getItemId
()
!=
itemId
).
collect
(
Collectors
.
toList
());
if
(!
removedItem
)
{
removedItem
=
true
;
logger
.
debug
(
"Removed first item"
);
}
logger
.
trace
(
"Removed item {}"
,
itemId
);
}
}
logger
.
debug
(
"Remaining AssociationElement count {}"
,
associationElements
.
size
());
// Then we remove users
userIdSet
=
associationElements
.
stream
().
map
(
element
->
element
.
getUserId
()).
collect
(
Collectors
.
toSet
());
for
(
Long
userId
:
userIdSet
)
{
itemCount
=
associationElements
.
stream
().
filter
(
element
->
element
.
getUserId
()
==
userId
).
count
();
if
(
itemCount
<=
itemSize
)
{
associationElements
=
associationElements
.
stream
().
filter
(
element
->
element
.
getUserId
()
!=
userId
).
collect
(
Collectors
.
toList
());
if
(!
removedUser
)
{
removedUser
=
true
;
logger
.
debug
(
"Removed first user"
);
}
logger
.
trace
(
"Removed user {}"
,
userId
);
}
}
logger
.
debug
(
"Remaining AssociationElement count {}"
,
associationElements
.
size
());
logger
.
debug
(
"Remover item or user {}"
,
removedUser
||
removedItem
);
return
associationElements
;
}
private
List
<
Integer
>
chooseAnnotated
(
List
<
AssociationElement
>
annotableElements
,
List
<
AssociationElement
>
associationElements
,
int
userSize
,
int
itemSize
)
{
List
<
Integer
>
annotatedChosen
=
new
ArrayList
<>();
int
size
=
a
ssociation
Elements
.
size
();
int
size
=
a
nnotable
Elements
.
size
();
long
userCount
=
0
;
long
itemCount
=
0
;
AssociationElement
randomAssociationElement
;
...
...
@@ -128,16 +180,17 @@ public class PreprocessingRunner implements ApplicationRunner {
randomInteger
=
new
Integer
(
random
.
nextInt
(
size
));
if
(!
annotatedChosen
.
contains
(
randomInteger
))
{
randomAssociationElement
=
a
ssociation
Elements
.
get
(
randomInteger
);
randomAssociationElement
=
a
nnotable
Elements
.
get
(
randomInteger
);
final
Long
itemId
=
randomAssociationElement
.
getItemId
();
final
Long
userId
=
randomAssociationElement
.
getUserId
();
userCount
=
associationElements
.
stream
().
filter
(
element
->
element
.
getItemId
()
==
itemId
).
count
();
itemCount
=
associationElements
.
stream
().
filter
(
element
->
element
.
getUserId
()
==
userId
).
count
();
logger
.
trace
(
"Checking new AssociationElement for annotation"
);
// Decreasing values based on planned suppressions
// TODO Refactor writeSampleAndAnnotated and chooseAnnotated to avoid this
for
(
Integer
annotatedIndex
:
annotatedChosen
)
{
checkingAssociationElement
=
a
ssociation
Elements
.
get
(
annotatedIndex
);
checkingAssociationElement
=
a
nnotable
Elements
.
get
(
annotatedIndex
);
if
(
checkingAssociationElement
.
getUserId
()
==
userId
)
{
userCount
--;
}
...
...
@@ -148,6 +201,7 @@ public class PreprocessingRunner implements ApplicationRunner {
if
(
userCount
>
userSize
&&
itemCount
>
itemSize
)
{
annotatedChosen
.
add
(
randomInteger
);
logger
.
debug
(
"Adding new AssociationElement to annotated, total is {}"
,
annotatedChosen
.
size
());
}
}
}
...
...
@@ -155,9 +209,10 @@ public class PreprocessingRunner implements ApplicationRunner {
return
annotatedChosen
;
}
private
void
writeSampleAndAnnotated
(
File
sampleFile
,
File
annotatedFile
,
List
<
Integer
>
annotateIndexes
,
List
<
AssociationElement
>
associationElements
)
throws
PreprocessingException
{
private
void
writeSampleAndAnnotated
(
File
sampleFile
,
File
annotatedFile
,
List
<
Integer
>
annotateIndexes
,
List
<
AssociationElement
>
associationElements
,
List
<
AssociationElement
>
annotableElements
)
throws
PreprocessingException
{
try
{
AssociationElement
associationElement
;
Integer
annotableIndex
;
if
(
ratings
)
{
RatingElement
ratingElement
;
CSVFormat
ratingsFormat
=
CSVFormat
.
TDF
.
withHeader
(
"itemId"
,
"userId"
,
"rating"
);
...
...
@@ -166,8 +221,8 @@ public class PreprocessingRunner implements ApplicationRunner {
for
(
int
i
=
0
;
i
<
associationElements
.
size
();
i
++)
{
ratingElement
=
(
RatingElement
)
associationElements
.
get
(
i
);
Integer
index
=
new
Integer
(
i
);
if
(
annota
teIndexes
.
contains
(
i
ndex
))
{
annotableIndex
=
new
Integer
(
annotableElements
.
indexOf
(
ratingElement
)
);
if
(
annota
bleIndex
>=
0
&&
annotateIndexes
.
contains
(
annotableI
ndex
))
{
annotatedPrinter
.
printRecord
(
ratingElement
.
getItemId
(),
ratingElement
.
getUserId
(),
ratingElement
.
getRating
());
}
else
{
samplePrinter
.
printRecord
(
ratingElement
.
getItemId
(),
ratingElement
.
getUserId
(),
ratingElement
.
getRating
());
...
...
@@ -183,8 +238,8 @@ public class PreprocessingRunner implements ApplicationRunner {
for
(
int
i
=
0
;
i
<
associationElements
.
size
();
i
++)
{
associationElement
=
associationElements
.
get
(
i
);
Integer
index
=
new
Integer
(
i
);
if
(
annota
teIndexes
.
contains
(
i
ndex
))
{
annotableIndex
=
new
Integer
(
annotableElements
.
indexOf
(
associationElement
)
);
if
(
annota
bleIndex
>=
0
&&
annotateIndexes
.
contains
(
annotableI
ndex
))
{
annotatedPrinter
.
printRecord
(
associationElement
.
getItemId
(),
associationElement
.
getUserId
());
}
else
{
samplePrinter
.
printRecord
(
associationElement
.
getItemId
(),
associationElement
.
getUserId
());
...
...
Please
register
or
login
to post a comment