@@ -35,7 +35,8 @@ object CoCoA {
35
35
beta : Double ,
36
36
chkptIter : Int ,
37
37
testData : RDD [SparseClassificationPoint ],
38
- debugIter : Int ) : (Array [Double ], RDD [(Int , Double )]) = {
38
+ debugIter : Int ,
39
+ seed : Int ) : (Array [Double ], RDD [(Int , Double )]) = {
39
40
40
41
val parts = data.partitions.size // number of partitions of the data, K in the paper
41
42
println(" \n Running CoCoA on " + n+ " data examples, distributed over " + parts+ " workers" )
@@ -45,16 +46,16 @@ object CoCoA {
45
46
var w = wInit
46
47
val scaling = beta / parts;
47
48
48
- for (t <- 1 until numRounds+ 1 ){
49
+ for (t <- 1 to numRounds){
49
50
50
51
// zip alpha with data
51
52
val zipData = alpha.zip(data)
52
53
53
54
// find updates to alpha, w
54
- val updates = zipData.mapPartitions(partitionUpdate(_,w,localIters,lambda,n,scaling),preservesPartitioning= true ).persist()
55
+ val updates = zipData.mapPartitions(partitionUpdate(_,w,localIters,lambda,n,scaling,seed + t ),preservesPartitioning= true ).persist()
55
56
alpha = updates.map(kv => kv._2)
56
57
val primalVariables = updates.map(kv => kv._1)
57
- val primalUpdates = primalVariables.mapPartitions(singleElementFromPartition,preservesPartitioning = true ).reduce(_ plus _)
58
+ val primalUpdates = primalVariables.mapPartitions(x => Iterator (x.next()) ).reduce(_ plus _)
58
59
w = primalUpdates.times(scaling).plus(w)
59
60
60
61
// optionally calculate errors
@@ -75,13 +76,6 @@ object CoCoA {
75
76
return (w, alpha)
76
77
}
77
78
78
- private def singleElementFromPartition (
79
- primalVariables : Iterator [Array [Double ]]): Iterator [Array [Double ]] = {
80
- var wVectorList = List [Array [Double ]]()
81
- wVectorList = primalVariables.next() :: wVectorList
82
- return wVectorList.iterator
83
- }
84
-
85
79
/**
86
80
* Performs one round of local updates using a given local dual algorithm,
87
81
* here locaSDCA. Will perform localIters many updates per worker.
@@ -92,6 +86,7 @@ object CoCoA {
92
86
* @param lambda
93
87
* @param n
94
88
* @param scaling this is the scaling factor beta/K in the paper
89
+ * @param seed
95
90
* @return
96
91
*/
97
92
private def partitionUpdate (
@@ -100,14 +95,15 @@ object CoCoA {
100
95
localIters : Int ,
101
96
lambda : Double ,
102
97
n : Int ,
103
- scaling : Double ): Iterator [(Array [Double ], (Int , Double ))] = {
98
+ scaling : Double ,
99
+ seed : Int ): Iterator [(Array [Double ], (Int , Double ))] = {
104
100
105
101
val zipArr = zipData.toArray
106
102
var localData = zipArr.map(x => x._2)
107
103
var alpha = zipArr.map(x => x._1._2)
108
104
val indices = (0 to localData.length- 1 ).map(x => localData(x).index).toArray
109
105
val alphaOld = alpha.clone
110
- val (deltaAlpha, deltaW) = localSDCA(localData, wInit, localIters, lambda, n, alpha, alphaOld)
106
+ val (deltaAlpha, deltaW) = localSDCA(localData, wInit, localIters, lambda, n, alpha, alphaOld, seed )
111
107
112
108
alpha = alphaOld.plus(deltaAlpha.times(scaling))
113
109
var wArray = Array .fill(localData.length)(Array (0.0 ))
@@ -134,6 +130,7 @@ object CoCoA {
134
130
* @param n global number of points (needed for the primal-dual correspondence)
135
131
* @param alpha
136
132
* @param alphaOld
133
+ * @param seed
137
134
* @return deltaAlpha and deltaW, summarizing the performed local changes, see paper
138
135
*/
139
136
def localSDCA (
@@ -143,10 +140,11 @@ object CoCoA {
143
140
lambda : Double ,
144
141
n : Int ,
145
142
alpha : Array [Double ],
146
- alphaOld : Array [Double ]): (Array [Double ], Array [Double ]) = {
143
+ alphaOld : Array [Double ],
144
+ seed : Int ): (Array [Double ], Array [Double ]) = {
147
145
var w = wInit
148
146
val nLocal = localData.length
149
- var r = new scala.util.Random
147
+ var r = new scala.util.Random (seed)
150
148
var deltaW = Array .fill(wInit.length)(0.0 )
151
149
152
150
// perform local udpates
@@ -172,7 +170,7 @@ object CoCoA {
172
170
val qii = x.dot(x)
173
171
var newAlpha = 1.0
174
172
if (qii != 0.0 ) {
175
- newAlpha = Math .min(Math .max(alpha(idx) - grad / qii, 0.0 ), 1.0 )
173
+ newAlpha = Math .min(Math .max(( alpha(idx) - ( grad / qii)) , 0.0 ), 1.0 )
176
174
}
177
175
178
176
// update primal and dual variables
0 commit comments