Compare b863363 ... +69 ... 5a20c50

Showing 8 of 112 files from the diff.
Other files ignored by Codecov
requirements.txt has changed.
setup.py has changed.
MANIFEST.in has changed.
tox.ini has changed.

@@ -253,6 +253,8 @@
Loading
253 253
                self.sub_audits[pair].distribution_null = convolve(self.sub_audits[pair].distribution_null,
254 254
                                                                   distribution_round_draw,
255 255
                                                                   method='direct')
256 +
                self.sub_audits[pair].distribution_null = [abs(p) for p in self.sub_audits[pair].distribution_null]
257 +
 
256 258
        else:
257 259
            half_contest_ballots = math.floor(sub_audit.sub_contest.contest_ballots / 2)
258 260
            if len(self.rounds) == 1:
@@ -337,6 +339,7 @@
Loading
337 339
                self.sub_audits[pair].distribution_reported_tally = convolve(sub_audit.distribution_reported_tally,
338 340
                                                                             distribution_round_draw,
339 341
                                                                             method='direct')
342 +
                self.sub_audits[pair].distribution_reported_tally = [abs(p) for p in self.sub_audits[pair].distribution_reported_tally]
340 343
        else:
341 344
            reported_winner_ballots = int(sub_audit.sub_contest.winner_prop * sub_audit.sub_contest.contest_ballots)
342 345
            if len(self.rounds) == 1:
@@ -519,7 +522,7 @@
Loading
519 522
        round risk and stopping probability are stored.
520 523
        """
521 524
522 -
        self.__reset()
525 +
        self._reset()
523 526
        click.echo('\n==================\nBeginning Audit...\n==================\n')
524 527
        # FIXME: no overall minimum sample size exists, so max of all sub audit mins used
525 528
        sample_size = max(sub_audit.min_sample_size for sub_audit in self.sub_audits.values())
@@ -603,14 +606,17 @@
Loading
603 606
604 607
        click.echo('\n\nAudit Complete: Reached max sample size.')
605 608
606 -
    def __reset(self):
609 +
    def _reset(self):
607 610
        """Reset attributes modified during run()."""
608 611
609 612
        self.rounds = []
610 613
        self.sample_winner_ballots = []
611 614
        self.pvalue_schedule = []
612 615
        for loser in self.sub_audits.keys():
613 616
            self.sub_audits[loser]._reset()
617 +
        self.sample_ballots = {}
618 +
        for candidate in self.contest.candidates:
619 +
            self.sample_ballots[candidate] = []
614 620
        self.stopped = False
615 621
616 622
    @abstractmethod

@@ -0,0 +1,1253 @@
Loading
1 +
import math
2 +
import random as r
3 +
from typing import List
4 +
from typing import Tuple
5 +
6 +
from r2b2.minerva import Minerva
7 +
from r2b2.simulator import Simulation
8 +
from r2b2.simulator import histogram
9 +
10 +
11 +
class MinervaOneRoundRisk(Simulation):
12 +
    """Simulate a 1-round Minerva audit for a given sample size to compute risk limit."""
13 +
    sample_size: int
14 +
    total_relevant_ballots: int
15 +
    vote_dist: List[Tuple[str, int]]
16 +
    audit: Minerva
17 +
18 +
    def __init__(self,
19 +
                 alpha,
20 +
                 reported,
21 +
                 sample_size,
22 +
                 db_mode=True,
23 +
                 db_host='localhost',
24 +
                 db_name='r2b2',
25 +
                 db_port=27017,
26 +
                 user='writer',
27 +
                 pwd='icanwrite',
28 +
                 *args,
29 +
                 **kwargs):
30 +
        super().__init__('minerva', alpha, reported, 'tie', True, db_mode, db_host, db_port, db_name, user, pwd, *args, **kwargs)
31 +
        self.sample_size = sample_size
32 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
33 +
        # FIXME: temporary until pairwise contest fix is implemented
34 +
        self.contest_ballots = self.reported.contest_ballots
35 +
        self.reported.contest_ballots = self.total_relevant_ballots
36 +
        self.reported.winner_prop = self.reported.tally[self.reported.reported_winners[0]] / self.reported.contest_ballots
37 +
        self.audit = Minerva(self.alpha, 1.0, self.reported)
38 +
39 +
        if sample_size < self.audit.min_sample_size:
40 +
            raise ValueError('Sample size is less than minimum sample size for audit.')
41 +
42 +
        # FIXME: sorted candidate list will be created by new branch, update once merged
43 +
        # Generate a sorted underlying vote distribution
44 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
45 +
        self.vote_dist = [(sorted_tally[0][0], self.total_relevant_ballots // 2)]
46 +
        for i in range(1, len(sorted_tally)):
47 +
            self.vote_dist.append((sorted_tally[i][0], self.total_relevant_ballots))
48 +
        self.vote_dist.append(('invalid', self.contest_ballots))
49 +
50 +
    def trial(self, seed):
51 +
        """Execute a 1-round minerva audit (using r2b2.minerva.Minerva)"""
52 +
53 +
        r.seed(seed)
54 +
55 +
        # Draw a sample of a given size
56 +
        sample = [0 for i in range(len(self.vote_dist))]
57 +
        for i in range(self.sample_size):
58 +
            ballot = r.randint(1, self.contest_ballots)
59 +
            for j in range(len(sample)):
60 +
                if ballot <= self.vote_dist[j][1]:
61 +
                    sample[j] += 1
62 +
                    break
63 +
64 +
        relevant_sample_size = self.sample_size - sample[-1]
65 +
66 +
        # Perform audit computations
67 +
        self.audit._reset()
68 +
        self.audit.rounds.append(relevant_sample_size)
69 +
        self.audit.current_dist_null()
70 +
        self.audit.current_dist_reported()
71 +
        p_value = self.audit.compute_risk(sample[0], relevant_sample_size)
72 +
        if p_value <= self.alpha:
73 +
            stop = True
74 +
        else:
75 +
            stop = False
76 +
77 +
        return {
78 +
            'stop': stop,
79 +
            'p_value': p_value,
80 +
            'sample_size': self.sample_size,
81 +
            'relevant_sample_size': relevant_sample_size,
82 +
            'winner_ballots': sample[0]
83 +
        }
84 +
85 +
    def analyze(self, verbose: bool = False, hist: bool = False):
86 +
        """Analyze trials to get experimental risk.
87 +
88 +
        Args:
89 +
            verbose (bool): If true, analyze will print simulation analysis information.
90 +
            hist (bool): If true, analyze will generate and display 2 histograms: winner
91 +
                ballots found in the sample size and computed risk.
92 +
        """
93 +
        if self.db_mode:
94 +
            trials = self.db.trial_lookup(self.sim_id)
95 +
        else:
96 +
            trials = self.trials
97 +
        num_trials = 0
98 +
        stopped = 0
99 +
        total_risk = 0
100 +
        total_relevant_sampled = 0
101 +
        winner_ballot_dist = []
102 +
        risk_dist = []
103 +
104 +
        for trial in trials:
105 +
            num_trials += 1
106 +
            if trial['stop']:
107 +
                stopped += 1
108 +
109 +
            total_relevant_sampled += trial['relevant_sample_size']
110 +
            winner_ballot_dist.append(trial['winner_ballots'])
111 +
            total_risk += trial['p_value']
112 +
            risk_dist.append(trial['p_value'])
113 +
114 +
        if verbose:
115 +
            print('Analysis\n========')
116 +
            print('Underlying election is tied\n')
117 +
            print('Number of trials: {}'.format(num_trials))
118 +
            print('Number of stopped: {}'.format(stopped))
119 +
            print('Risk Limit: {:%}'.format(self.alpha))
120 +
            print('Risk Computed: {:%}'.format(stopped / num_trials))
121 +
        if hist:
122 +
            histogram(winner_ballot_dist, 'Winner ballots found in sample of size: {}'.format(self.sample_size))
123 +
            histogram(risk_dist, 'Risk (p_value) dist.')
124 +
125 +
        # Update simulation entry to include analysis
126 +
        if self.db_mode:
127 +
            self.db.update_analysis(self.sim_id, (stopped / num_trials))
128 +
        return stopped / num_trials
129 +
130 +
131 +
class MinervaOneRoundStoppingProb(Simulation):
132 +
    """Simulate a 1-round Minerva audit for a given sample size to compute stopping probability."""
133 +
    sample_size: int
134 +
    total_relevant_ballots: int
135 +
    vote_dist: List[Tuple[str, int]]
136 +
    audit: Minerva
137 +
138 +
    def __init__(self,
139 +
                 alpha,
140 +
                 reported,
141 +
                 sample_size,
142 +
                 db_mode=True,
143 +
                 db_host='localhost',
144 +
                 db_name='r2b2',
145 +
                 db_port=27017,
146 +
                 user='writer',
147 +
                 pwd='icanwrite',
148 +
                 *args,
149 +
                 **kwargs):
150 +
        super().__init__('minerva', alpha, reported, 'reported', True, db_mode, db_host, db_port, db_name, user, pwd, *args, **kwargs)
151 +
        self.sample_size = sample_size
152 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
153 +
        # FIXME: temporary until pairwise contest fix is implemented
154 +
        self.contest_ballots = self.reported.contest_ballots
155 +
        self.reported.contest_ballots = self.total_relevant_ballots
156 +
        self.reported.winner_prop = self.reported.tally[self.reported.reported_winners[0]] / self.reported.contest_ballots
157 +
        self.audit = Minerva(self.alpha, 1.0, self.reported)
158 +
159 +
        if sample_size < self.audit.min_sample_size:
160 +
            raise ValueError('Sample size is less than minimum sample size for audit')
161 +
162 +
        # FIXME: sorted candidate list will be created by new branch, update once merged
163 +
        # Generate a sorted underlying vote distribution
164 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
165 +
        self.vote_dist = [(sorted_tally[0][0], sorted_tally[0][1])]
166 +
        current = sorted_tally[0][1]
167 +
        for i in range(1, len(sorted_tally)):
168 +
            current += sorted_tally[i][1]
169 +
            self.vote_dist.append((sorted_tally[i][0], current))
170 +
        self.vote_dist.append(('invalid', self.contest_ballots))
171 +
172 +
    def trial(self, seed):
173 +
        """Execute a 1-round minerva audit."""
174 +
175 +
        r.seed(seed)
176 +
177 +
        # Draw a sample
178 +
        sample = [0 for i in range(len(self.vote_dist))]
179 +
        for i in range(self.sample_size):
180 +
            ballot = r.randint(1, self.contest_ballots)
181 +
            for j in range(len(sample)):
182 +
                if ballot <= self.vote_dist[j][1]:
183 +
                    sample[j] += 1
184 +
                    break
185 +
        relevant_sample_size = self.sample_size - sample[-1]
186 +
187 +
        # Perform audit computations
188 +
        self.audit._reset()
189 +
        self.audit.rounds.append(relevant_sample_size)
190 +
        self.audit.current_dist_null()
191 +
        self.audit.current_dist_reported()
192 +
        p_value = self.audit.compute_risk(sample[0], relevant_sample_size)
193 +
        if p_value <= self.alpha:
194 +
            stop = True
195 +
        else:
196 +
            stop = False
197 +
198 +
        return {
199 +
            'stop': stop,
200 +
            'p_value': p_value,
201 +
            'sample_size': self.sample_size,
202 +
            'relevant_sample_size': relevant_sample_size,
203 +
            'winner_ballots': sample[0]
204 +
        }
205 +
206 +
    def analyze(self, verbose: bool = False, hist: bool = False):
207 +
        """Analyse trials to get experimental stopping probability"""
208 +
        if self.db_mode:
209 +
            trials = self.db.trial_lookup(self.sim_id)
210 +
        else:
211 +
            trials = self.trials
212 +
        num_trials = 0
213 +
        stopped = 0
214 +
        winner_ballot_dist = []
215 +
        risk_dist = []
216 +
217 +
        for trial in trials:
218 +
            num_trials += 1
219 +
            if trial['stop']:
220 +
                stopped += 1
221 +
222 +
                winner_ballot_dist.append(trial['winner_ballots'])
223 +
                risk_dist.append(trial['p_value'])
224 +
225 +
        # TODO: insert verbose and histograms
226 +
227 +
        # Update simulation entry to include analysis
228 +
        if self.db_mode:
229 +
            self.db.update_analysis(self.sim_id, (stopped / num_trials))
230 +
        return stopped / num_trials
231 +
232 +
233 +
class MinervaOneRoundAlteredMargin(Simulation):
234 +
    """Simulate a 1-round Minerva audit for a given sample size with a correct outcome but incorrect reported margin"""
235 +
    underlying_margin: float
236 +
    sample_size: int
237 +
    total_relevant_ballots: int
238 +
    vote_dist: List[Tuple[str, int]]
239 +
    audit: Minerva
240 +
241 +
    def __init__(self,
242 +
                 alpha,
243 +
                 reported,
244 +
                 underlying,
245 +
                 underlying_margin,
246 +
                 sample_size,
247 +
                 db_mode=True,
248 +
                 db_host='localhost',
249 +
                 db_name='r2b2',
250 +
                 db_port=27017,
251 +
                 user='writer',
252 +
                 pwd='icanwrite',
253 +
                 *args,
254 +
                 **kwargs):
255 +
        super().__init__('minerva', alpha, reported, {
256 +
            'change': underlying,
257 +
            'margin': underlying_margin
258 +
        }, True, db_mode, db_host, db_port, db_name, user, pwd, *args, **kwargs)
259 +
        self.underlying_margin = underlying_margin
260 +
        self.sample_size = sample_size
261 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
262 +
        # FIXME: temporary until pairwise contest fix is implemented
263 +
        self.contest_ballots = self.reported.contest_ballots
264 +
        self.reported.contest_ballots = self.total_relevant_ballots
265 +
        self.reported.winner_prop = self.reported.tally[self.reported.reported_winners[0]] / self.reported.contest_ballots
266 +
        self.audit = Minerva(self.alpha, 1.0, self.reported)
267 +
268 +
        if sample_size < self.audit.min_sample_size:
269 +
            raise ValueError('Sample size is less than minimum sample size for audit')
270 +
271 +
        # FIXME: sorted candidate list will be created by new branch, update once merged
272 +
        # Generate a sorted underlying vote distribution
273 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
274 +
        underlying_winner_prop = (1.0 + underlying_margin) / 2.0
275 +
        self.vote_dist = [(sorted_tally[0][0], self.total_relevant_ballots * underlying_winner_prop)]
276 +
        # current = sorted_tally[0][1]
277 +
        # for i in range(1, len(sorted_tally)):
278 +
        #    current += sorted_tally[i][1]
279 +
        #    self.vote_dist.append((sorted_tally[i][0], current))
280 +
        for i in range(1, len(sorted_tally)):
281 +
            self.vote_dist.append((sorted_tally[i][0], self.total_relevant_ballots))
282 +
        self.vote_dist.append(('invalid', self.contest_ballots))
283 +
284 +
    def trial(self, seed):
285 +
        """Execute a 1-round minerva audit."""
286 +
287 +
        r.seed(seed)
288 +
289 +
        # Draw a sample
290 +
        sample = [0 for i in range(len(self.vote_dist))]
291 +
        for i in range(self.sample_size):
292 +
            ballot = r.randint(1, self.contest_ballots)
293 +
            for j in range(len(sample)):
294 +
                if ballot <= self.vote_dist[j][1]:
295 +
                    sample[j] += 1
296 +
                    break
297 +
        relevant_sample_size = self.sample_size - sample[-1]
298 +
299 +
        # Perform audit computations
300 +
        self.audit._reset()
301 +
        self.audit.rounds.append(relevant_sample_size)
302 +
        self.audit.current_dist_null()
303 +
        self.audit.current_dist_reported()
304 +
        p_value = self.audit.compute_risk(sample[0], relevant_sample_size)
305 +
        if p_value <= self.alpha:
306 +
            stop = True
307 +
        else:
308 +
            stop = False
309 +
310 +
        return {
311 +
            'stop': stop,
312 +
            'p_value': p_value,
313 +
            'sample_size': self.sample_size,
314 +
            'relevant_sample_size': relevant_sample_size,
315 +
            'winner_ballots': sample[0]
316 +
        }
317 +
318 +
    def analyze(self, verbose: bool = False, hist: bool = False):
319 +
        """Analyse trials to get experimental stopping probability"""
320 +
        if self.db_mode:
321 +
            trials = self.db.trial_lookup(self.sim_id)
322 +
        else:
323 +
            trials = self.trials
324 +
        num_trials = 0
325 +
        stopped = 0
326 +
        winner_ballot_dist = []
327 +
        total_risk = 0.0
328 +
329 +
        for trial in trials:
330 +
            num_trials += 1
331 +
            total_risk += trial['p_value']
332 +
            if trial['stop']:
333 +
                stopped += 1
334 +
                winner_ballot_dist.append(trial['winner_ballots'])
335 +
336 +
        # TODO: insert verbose and histograms
337 +
338 +
        # Update simulation entry to include analysis
339 +
        if self.db_mode:
340 +
            analysis = {'avg_p_value': (total_risk / num_trials), 'sprob': (stopped / num_trials)}
341 +
            self.db.update_analysis(self.sim_id, analysis)
342 +
343 +
        return analysis
344 +
345 +
346 +
class MinervaMultiRoundStoppingProb(Simulation):
347 +
    """Simulate a multi-round Minerva audit.
348 +
349 +
    If sample_sprob is provided, sample sizes to achieve a sample_sprob 
350 +
    probability of stopping will be computed and used. Otherwise,
351 +
    the initial sample size, sample_size, is given as input and further sample 
352 +
    sizes are an additional (sample_mult) * (sample_size)  ballots.
353 +
    The audit executes until it stops or reaches the maximum number of rounds.
354 +
    """
355 +
    sample_sprob: float
356 +
    sample_size: int
357 +
    sample_mult: float
358 +
    max_rounds: int
359 +
    total_relevant_ballots: int
360 +
    vote_dist: List[Tuple[str, int]]
361 +
    audit: Minerva
362 +
363 +
    def __init__(self,
364 +
                 alpha,
365 +
                 reported,
366 +
                 max_rounds,
367 +
                 sample_size=None, 
368 +
                 sample_mult=None,
369 +
                 sample_sprob=None,
370 +
                 db_mode=True,
371 +
                 db_host='localhost',
372 +
                 db_name='r2b2',
373 +
                 db_port=27017,
374 +
                 user='writer',
375 +
                 pwd='icanwrite',
376 +
                 *args,
377 +
                 **kwargs):
378 +
        # Add parameters to simulation DB entry
379 +
        if 'sim_args' in kwargs:
380 +
            kwargs['sim_args']['max_rounds'] = max_rounds
381 +
            kwargs['sim_args']['sample_mult'] = sample_mult
382 +
            kwargs['sim_args']['sample_sprob'] = sample_sprob
383 +
        else:
384 +
            kwargs['sim_args'] = {'max_rounds': max_rounds, 'sample_mult': sample_mult, 'sample_sprob': sample_sprob}
385 +
        super().__init__('minerva', alpha, reported, 'reported', True, db_mode, db_host, db_port, db_name, user, pwd, *args, **kwargs)
386 +
        self.sample_sprob = sample_sprob
387 +
        self.sample_size = sample_size
388 +
        self.sample_mult = sample_mult
389 +
        self.max_rounds = max_rounds
390 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
391 +
        # FIXME: temporary until pairwise contest fix is implemented
392 +
        self.contest_ballots = self.reported.contest_ballots
393 +
        #self.reported.contest_ballots = self.total_relevant_ballots
394 +
        #self.reported.winner_prop = self.reported.tally[self.reported.reported_winners[0]] / self.reported.contest_ballots
395 +
        self.audit = Minerva(self.alpha, 1.0, self.reported)
396 +
397 +
        if sample_sprob is None and sample_size is None and sample_mult is None:
398 +
            raise ValueError('Sample sizes cannot be chosen without sample_sprob or sample_size and sample_mult.')
399 +
        if sample_sprob is not None:
400 +
            if not sample_sprob > 0 or not sample_sprob < 1:
401 +
                raise ValueError('Sample size stopping probability is not between 0 and 1.')
402 +
        else:
403 +
            min_sample_size = 0
404 +
            for pairwise_audit in self.audit.sub_audits.values():
405 +
                min_sample_size = max(pairwise_audit.min_sample_size, min_sample_size)
406 +
            if sample_size < min_sample_size:
407 +
                raise ValueError('Sample size is less than minimum sample size for audit.')
408 +
        if max_rounds < 2:
409 +
            raise ValueError('Maximum rounds is too small.')
410 +
411 +
        # FIXME: sorted candidate list will be created by new branch, update once merged
412 +
        # Generate a sorted underlying vote distribution
413 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
414 +
        self.vote_dist = [(sorted_tally[0][0], sorted_tally[0][1])]
415 +
        current = sorted_tally[0][1]
416 +
        for i in range(1, len(sorted_tally)):
417 +
            current += sorted_tally[i][1]
418 +
            self.vote_dist.append((sorted_tally[i][0], current))
419 +
        self.vote_dist.append(('invalid', self.contest_ballots))
420 +
421 +
422 +
    def trial(self, seed):
423 +
        """Execute a multiround minerva audit (using r2b2.minerva.Minerva)"""
424 +
425 +
        r.seed(seed)
426 +
427 +
        # Ensure audit is reset
428 +
        self.audit._reset()
429 +
430 +
        # Initialize first round including initial sample size
431 +
        round_num = 1
432 +
        previous_sample_size = 0
433 +
        if self.sample_sprob is not None:
434 +
            current_sample_size = self.audit.next_sample_size(self.sample_sprob)
435 +
        else:
436 +
            current_sample_size = self.sample_size
437 +
            next_sample = math.ceil(self.sample_mult * self.sample_size)
438 +
        stop = False
439 +
440 +
        # For each round
441 +
        sample = [0 for i in range(len(self.vote_dist))]
442 +
        while round_num <= self.max_rounds:
443 +
            # Draw a sample of a given size
444 +
            if current_sample_size <= previous_sample_size:
445 +
                # TODO figure out when this happens and prevent it
446 +
                current_sample_size = previous_sample_size + 1
447 +
            for i in range(current_sample_size - previous_sample_size):
448 +
                ballot = r.randint(1, self.contest_ballots)
449 +
                for j in range(len(sample)):
450 +
                    if ballot <= self.vote_dist[j][1]:
451 +
                        sample[j] += 1
452 +
                        break
453 +
454 +
            # Convert this sample to a dict
455 +
            sample_dict = {}
456 +
            for i in range(len(self.vote_dist)):
457 +
                # For now, we will ignore the irrelevant votes for this simulation
458 +
                if not self.vote_dist[i][0] == 'invalid':
459 +
                    sample_dict[self.vote_dist[i][0]] = sample[i]
460 +
461 +
            relevant_sample_size = current_sample_size - sample[-1]
462 +
463 +
            # Execute a round of the audit for this sample
464 +
            stop = self.audit.execute_round(current_sample_size, sample_dict)
465 +
466 +
            # If audit is done, return trial output
467 +
            # FIXME: Improve output format
468 +
            if stop:
469 +
                return {
470 +
                    'stop': stop,
471 +
                    'round': round_num,
472 +
                    'p_value_sched': self.audit.pvalue_schedule,
473 +
                    'p_value': self.audit.get_risk_level(),
474 +
                    'relevant_sample_size_sched': self.audit.rounds,
475 +
                    'winner_ballots_drawn_sched': self.audit.sample_ballots
476 +
                    #'kmin_sched': self.audit.min_winner_ballots
477 +
                }
478 +
479 +
            # Else choose a next round size and continue
480 +
            round_num += 1
481 +
            previous_sample_size = current_sample_size
482 +
            if self.sample_sprob is not None:
483 +
                current_sample_size = self.audit.next_sample_size(self.sample_sprob)
484 +
            else:
485 +
                current_sample_size += next_sample
486 +
                next_sample = math.ceil(self.sample_mult * self.sample_size)
487 +
488 +
        # If audit does not stop, return trial output
489 +
        # FIXME: Improve output format
490 +
        return {
491 +
            'stop': stop,
492 +
            'round': self.max_rounds,
493 +
            'p_value_sched': self.audit.pvalue_schedule,
494 +
            'p_value': self.audit.get_risk_level(),
495 +
            'relevant_sample_size_sched': self.audit.rounds,
496 +
            'winner_ballots_drawn_sched': self.audit.sample_ballots
497 +
            #'kmin_sched': self.audit.min_winner_ballots
498 +
        }
499 +
500 +
    def analyze(self, verbose: bool = False, hist: bool = False):
501 +
        """Analyze trials to get experimental stopping probability.
502 +
503 +
        Args:
504 +
            verbose (bool): If true, analyze will print simulation analysis information.
505 +
            hist (bool): If true, analyze will generate and display 2 histograms: winner
506 +
                ballots found in the sample size and computed stopping probability.
507 +
        """
508 +
        if self.db_mode:
509 +
            trials = self.db.trial_lookup(self.sim_id)
510 +
        else:
511 +
            trials = self.trials
512 +
        num_trials = 0
513 +
        stopped = 0
514 +
        rounds_stopped = []
515 +
        # TODO: Create additinal structures to store trial data
516 +
        
517 +
        for trial in trials:
518 +
            num_trials += 1
519 +
            if trial['stop']:
520 +
                stopped += 1
521 +
                rounds_stopped.append(trial['round'])
522 +
            # TODO: Extract more data from trial
523 +
524 +
        if verbose:
525 +
            print('Analysis\n========\n')
526 +
            print('Number of trials: {}'.format(num_trials))
527 +
            print('Experiemtnal Stopping Prob: {:.5f}'.format(stopped / num_trials))
528 +
            if stopped > 0:
529 +
                print('Average Rounds in Stopped Trials: {:.2f}'.format(sum(rounds_stopped) / stopped))
530 +
531 +
        if hist:
532 +
            histogram(rounds_stopped, 'Rounds reached in stopped trials.')
533 +
534 +
        # Find stopping probability for each round
535 +
        sprob_by_round = [0]*self.max_rounds
536 +
        stopped_by_round = [0]*self.max_rounds
537 +
        remaining_by_round = [0]*(self.max_rounds+1)
538 +
        remaining_by_round[0] = num_trials #first round has all remaining
539 +
        for r in range(1,self.max_rounds+1):
540 +
            stopped_this_round = rounds_stopped.count(r)
541 +
            stopped_by_round[r-1] = stopped_this_round
542 +
            if remaining_by_round[r-1] is not 0:
543 +
                sprob_by_round[r-1] = stopped_this_round/remaining_by_round[r-1]
544 +
            else:
545 +
                sprob_by_round[r-1] = -1
546 +
            remaining_by_round[r] = remaining_by_round[r-1]-stopped_this_round
547 +
548 +
        analysis = { 
549 +
            'sprob': stopped / num_trials,
550 +
            'sprob_by_round': sprob_by_round,
551 +
            'remaining_by_round': remaining_by_round,
552 +
            'stopped_by_round': stopped_by_round
553 +
        }
554 +
555 +
        # Update simulation entry to include analysis
556 +
        if self.db_mode:
557 +
            self.db.update_analysis(self.sim_id, analysis)
558 +
559 +
        return analysis
560 +
561 +
562 +
class MinervaMultiRoundRisk(Simulation):
563 +
    """Simulate a multi-round Minerva audit.
564 +
565 +
    If sample_sprob is provided, sample sizes to achieve a sample_sprob
566 +
    probability of stopping will be computed and used. Otherwise,
567 +
    the initial sample size, x, is given as input and further sample sizes are
568 +
    an additional (sample_mult) * x ballots.
569 +
    The audit executes until it stops or reaches the maximum number of rounds.
570 +
    """
571 +
    sample_sprob: float
572 +
    sample_size: int
573 +
    sample_mult: float
574 +
    max_rounds: int
575 +
    total_relevant_ballots: int
576 +
    vote_dist: List[Tuple[str, int]]
577 +
    audit: Minerva
578 +
579 +
    def __init__(self,
580 +
                 alpha,
581 +
                 reported,
582 +
                 max_rounds,
583 +
                 sample_size=None,
584 +
                 sample_mult=None,
585 +
                 sample_sprob=None,
586 +
                 db_mode=True,
587 +
                 db_host='localhost',
588 +
                 db_name='r2b2',
589 +
                 db_port=27017,
590 +
                 user='writer',
591 +
                 pwd='icanwrite',
592 +
                 *args,
593 +
                 **kwargs):
594 +
        # Add parameters to simulation DB entry
595 +
        if 'sim_args' in kwargs:
596 +
            kwargs['sim_args']['max_rounds'] = max_rounds
597 +
            kwargs['sim_args']['sample_mult'] = sample_mult
598 +
            kwargs['sim_args']['sample_sprob'] = sample_sprob
599 +
        else:
600 +
            kwargs['sim_args'] = {'max_rounds': max_rounds, 'sample_mult': sample_mult, 'sample_sprob': sample_sprob}
601 +
        super().__init__('minerva', alpha, reported, 'tie', True, db_mode, db_host, db_port, db_name, user, pwd, *args, **kwargs)
602 +
        self.sample_size = sample_size
603 +
        self.sample_mult = sample_mult
604 +
        self.sample_sprob = sample_sprob
605 +
        self.max_rounds = max_rounds
606 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
607 +
        # FIXME: temporary until pairwise contest fix is implemented
608 +
        self.contest_ballots = self.reported.contest_ballots
609 +
        #self.reported.contest_ballots = self.total_relevant_ballots
610 +
        #self.reported.winner_prop = self.reported.tally[self.reported.reported_winners[0]] / self.reported.contest_ballots
611 +
        self.audit = Minerva(self.alpha, 1.0, self.reported)
612 +
613 +
        if sample_sprob is None and sample_size is None and sample_mult is None: 
614 +
            raise ValueError('Sample sizes cannot be chosen without sample_sprob or sample_size and sample_mult.')
615 +
        if sample_sprob is not None:
616 +
            if not sample_sprob > 0 or not sample_sprob < 1:
617 +
                raise ValueError('Sample size stopping probability is not between 0 and 1.')
618 +
        else:
619 +
            min_sample_size = 0
620 +
            for pairwise_audit in self.audit.sub_audits.values():
621 +
                min_sample_size = max(pairwise_audit.min_sample_size, min_sample_size)
622 +
            if sample_size < min_sample_size:
623 +
                raise ValueError('Sample size is less than minimum sample size for audit.')
624 +
        if max_rounds < 2:
625 +
            raise ValueError('Maximum rounds is too small.')
626 +
627 +
        # FIXME: sorted candidate list will be created by new branch, update once merged
628 +
        # Generate a sorted underlying vote distribution for a tied election
629 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
630 +
        self.vote_dist = [(sorted_tally[0][0], self.total_relevant_ballots // 2)]
631 +
        for i in range(1, len(sorted_tally)):
632 +
            self.vote_dist.append((sorted_tally[i][0], self.total_relevant_ballots))
633 +
        self.vote_dist.append(('invalid', self.contest_ballots))
634 +
635 +
    def trial(self, seed):
636 +
        """Execute a multiround minerva audit (using r2b2.minerva.Minerva)"""
637 +
638 +
        r.seed(seed)
639 +
640 +
        # Ensure audit is reset
641 +
        self.audit._reset()
642 +
643 +
        # Initialize first round including initial sample size
644 +
        round_num = 1
645 +
        previous_sample_size = 0
646 +
        if self.sample_sprob is None:
647 +
            current_sample_size = self.sample_size
648 +
            next_sample = math.ceil(self.sample_mult * self.sample_size)
649 +
        stop = False
650 +
651 +
        # For each round
652 +
        sample = [0 for i in range(len(self.vote_dist))]
653 +
        while round_num <= self.max_rounds:
654 +
            if self.sample_sprob is not None:
655 +
                current_sample_size = self.audit.next_sample_size(self.sample_sprob)
656 +
            # Draw a sample of a given size
657 +
            for i in range(current_sample_size - previous_sample_size):
658 +
                ballot = r.randint(1, self.contest_ballots)
659 +
                for j in range(len(sample)):
660 +
                    if ballot <= self.vote_dist[j][1]:
661 +
                        sample[j] += 1
662 +
                        break
663 +
664 +
            # Convert this sample to a dict
665 +
            sample_dict = {}
666 +
            for i in range(len(self.vote_dist)):
667 +
                # For now, we will ignore the irrelevant ballots
668 +
                if not self.vote_dist[i][0] == 'invalid':
669 +
                    sample_dict[self.vote_dist[i][0]] = sample[i]
670 +
671 +
            relevant_sample_size = current_sample_size - sample[-1]
672 +
673 +
            # Execute a round of the audit for this sample
674 +
            stop = self.audit.execute_round(current_sample_size, sample_dict)
675 +
676 +
            # If audit is done, return trial output
677 +
            # FIXME: Improve output format
678 +
            if stop:
679 +
                return {
680 +
                    'stop': stop,
681 +
                    'round': round_num,
682 +
                    'p_value_sched': self.audit.pvalue_schedule,
683 +
                    'p_value': self.audit.get_risk_level(),
684 +
                    'relevant_sample_size_sched': self.audit.rounds,
685 +
                    'winner_ballots_drawn_sched': self.audit.sample_ballots,
686 +
                    #'kmin_sched': self.audit.min_winner_ballots
687 +
                }
688 +
689 +
            # Else choose a next round size and continue
690 +
            round_num += 1
691 +
            previous_sample_size = current_sample_size
692 +
            if self.sample_sprob is None:
693 +
                current_sample_size += next_sample
694 +
                next_sample = math.ceil(self.sample_mult * self.sample_size)
695 +
696 +
        # If audit does not stop, return trial output
697 +
        # FIXME: Improve output format
698 +
        return {
699 +
            'stop': stop,
700 +
            'round': self.max_rounds,
701 +
            'p_value_sched': self.audit.pvalue_schedule,
702 +
            'p_value': self.audit.get_risk_level(),
703 +
            'relevant_sample_size_sched': self.audit.rounds,
704 +
            'winner_ballots_drawn_sched': self.audit.sample_ballots,
705 +
            #'kmin_sched': self.audit.min_winner_ballots
706 +
        }
707 +
708 +
    def analyze(self, verbose: bool = False, hist: bool = False):
709 +
        """Analyze trials to get experimental risk.
710 +
711 +
        Args:
712 +
            verbose (bool): If true, analyze will print simulation analysis information.
713 +
            hist (bool): If true, analyze will generate and display 2 histograms: winner
714 +
                ballots found in the sample size and computed risk.
715 +
        """
716 +
        if self.db_mode:
717 +
            trials = self.db.trial_lookup(self.sim_id)
718 +
        else:
719 +
            trials = self.trials
720 +
        num_trials = 0
721 +
        stopped = 0
722 +
        rounds_stopped = []
723 +
        # TODO: Create additinal structures to store trial data
724 +
725 +
        for trial in trials:
726 +
            num_trials += 1
727 +
            if trial['stop']:
728 +
                stopped += 1
729 +
                rounds_stopped.append(trial['round'])
730 +
            # TODO: Extract more data from trial
731 +
732 +
        if verbose:
733 +
            print('Analysis\n========\n')
734 +
            print('Number of trials: {}'.format(num_trials))
735 +
            print('Experiemtnal Risk: {:.5f}'.format(stopped / num_trials))
736 +
            if stopped > 0:
737 +
                print('Average Rounds in Stopped Trials: {:.2f}'.format(sum(rounds_stopped) / stopped))
738 +
739 +
        if hist:
740 +
            histogram(rounds_stopped, 'Rounds reached in stopped trials.')
741 +
742 +
        # Find risk for each round
743 +
        risk_by_round = [0]*self.max_rounds
744 +
        stopped_by_round = [0]*self.max_rounds
745 +
        remaining_by_round = [0]*(self.max_rounds+1)
746 +
        remaining_by_round[0] = num_trials #first round has all remaining
747 +
        for r in range(1,self.max_rounds+1):
748 +
            stopped_this_round = rounds_stopped.count(r)
749 +
            stopped_by_round[r-1] = stopped_this_round
750 +
            if remaining_by_round[r-1] is not 0:
751 +
                risk_by_round[r-1] =stopped_this_round/remaining_by_round[r-1]
752 +
            else:
753 +
                risk_by_round[r-1] = -1
754 +
            remaining_by_round[r] = remaining_by_round[r-1]-stopped_this_round
755 +
756 +
        analysis = { 
757 +
            'risk': stopped / num_trials,
758 +
            'risk_by_round': risk_by_round,
759 +
            'remaining_by_round': remaining_by_round,
760 +
            'stopped_by_round': stopped_by_round
761 +
        }
762 +
763 +
        # Update simulation entry to include analysis
764 +
        if self.db_mode:
765 +
            self.db.update_analysis(self.sim_id, analysis)
766 +
767 +
        return analysis
768 +
769 +
770 +
class MinervaRandomMultiRoundRisk(Simulation):
771 +
    """Simulate a multi-round Minerva audit for random subsequent sample sizes.
772 +
773 +
    The initial sample size, x, is given as input and further sample sizes are
774 +
    chosen randomly as an additioanl 0.5x to 1.5x ballots in the next round.
775 +
    The audit executes until it stops or reaches the maximum number of rounds.
776 +
    """
777 +
    sample_size: int
778 +
    max_rounds: int
779 +
    total_relevant_ballots: int
780 +
    vote_dist: List[Tuple[str, int]]
781 +
    audit: Minerva
782 +
783 +
    def __init__(self,
784 +
                 alpha,
785 +
                 reported,
786 +
                 sample_size,
787 +
                 max_rounds,
788 +
                 db_mode=True,
789 +
                 db_host='localhost',
790 +
                 db_name='r2b2',
791 +
                 db_port=27017,
792 +
                 user='writer',
793 +
                 pwd='icanwrite',
794 +
                 *args,
795 +
                 **kwargs):
796 +
        if 'sim_args' in kwargs:
797 +
            kwargs['sim_args']['max_rounds'] = max_rounds
798 +
        else:
799 +
            kwargs['sim_args'] = {'max_rounds': max_rounds}
800 +
        super().__init__('minerva', alpha, reported, 'tie', True, db_mode, db_host, db_port, db_name, user, pwd, *args, **kwargs)
801 +
        self.sample_size = sample_size
802 +
        self.max_rounds = max_rounds
803 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
804 +
        # FIXME: temporary until pairwise contest fix is implemented
805 +
        self.contest_ballots = self.reported.contest_ballots
806 +
        self.reported.contest_ballots = self.total_relevant_ballots
807 +
        self.reported.winner_prop = self.reported.tally[self.reported.reported_winners[0]] / self.reported.contest_ballots
808 +
        self.audit = Minerva(self.alpha, 1.0, self.reported)
809 +
810 +
        if sample_size < self.audit.min_sample_size:
811 +
            raise ValueError('Sample size is less than minimum sample size for audit.')
812 +
        if max_rounds < 2:
813 +
            raise ValueError('Maximum rounds is too small.')
814 +
815 +
        # FIXME: sorted candidate list will be created by new branch, update once merged
816 +
        # Generate a sorted underlying vote distribution for a tied election
817 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
818 +
        self.vote_dist = [(sorted_tally[0][0], self.total_relevant_ballots // 2)]
819 +
        for i in range(1, len(sorted_tally)):
820 +
            self.vote_dist.append((sorted_tally[i][0], self.total_relevant_ballots))
821 +
        self.vote_dist.append(('invalid', self.contest_ballots))
822 +
823 +
    def trial(self, seed):
824 +
        """Execute a 1-round minerva audit (using r2b2.minerva.Minerva)"""
825 +
826 +
        r.seed(seed)
827 +
828 +
        # Ensure audit is reset
829 +
        self.audit._reset()
830 +
831 +
        # Initialize first round with given initial sample size
832 +
        round_num = 1
833 +
        previous_sample_size = 0
834 +
        current_sample_size = self.sample_size
835 +
        stop = False
836 +
837 +
        # For each round
838 +
        sample = [0 for i in range(len(self.vote_dist))]
839 +
        while round_num <= self.max_rounds:
840 +
            # Draw a sample of a given size
841 +
            for i in range(current_sample_size - previous_sample_size):
842 +
                ballot = r.randint(1, self.contest_ballots)
843 +
                for j in range(len(sample)):
844 +
                    if ballot <= self.vote_dist[j][1]:
845 +
                        sample[j] += 1
846 +
                        break
847 +
848 +
            relevant_sample_size = current_sample_size - sample[-1]
849 +
850 +
            # Perform audit computations
851 +
            self.audit.rounds.append(relevant_sample_size)
852 +
            self.audit.current_dist_null()
853 +
            self.audit.current_dist_reported()
854 +
            # Check is audit has completed
855 +
            if (self.audit.stopping_condition(sample[0])):
856 +
                stop = True
857 +
            # Continue audit computations
858 +
            kmin = self.audit.next_min_winner_ballots(relevant_sample_size)
859 +
            self.audit.min_winner_ballots.append(kmin)
860 +
            self.audit.truncate_dist_null()
861 +
            self.audit.truncate_dist_reported()
862 +
            self.audit.sample_winner_ballots.append(sample[0])
863 +
864 +
            # If audit is done, return trial output
865 +
            # FIXME: Improve output format
866 +
            if stop:
867 +
                return {
868 +
                    'stop': stop,
869 +
                    'round': round_num,
870 +
                    'p_value_sched': self.audit.pvalue_schedule,
871 +
                    'p_value': self.audit.get_risk_level(),
872 +
                    'relevant_sample_size_sched': self.audit.rounds,
873 +
                    'winner_ballots_drawn_sched': self.audit.sample_winner_ballots,
874 +
                    #'kmin_sched': self.audit.min_winner_ballots
875 +
                }
876 +
877 +
            # Else choose a next round size and continue
878 +
            round_num += 1
879 +
            sample_mult = r.uniform(0.5, 1.5)
880 +
            next_sample = math.ceil(self.sample_size * sample_mult)
881 +
            previous_sample_size = current_sample_size
882 +
            current_sample_size += next_sample
883 +
884 +
        # If audit does not stop, return trial output
885 +
        # FIXME: Improve output format
886 +
        return {
887 +
            'stop': stop,
888 +
            'round': self.max_rounds,
889 +
            'p_value_sched': self.audit.pvalue_schedule,
890 +
            'p_value': self.audit.get_risk_level(),
891 +
            'relevant_sample_size_sched': self.audit.rounds,
892 +
            'winner_ballots_drawn_sched': self.audit.sample_winner_ballots,
893 +
            #'kmin_sched': self.audit.min_winner_ballots
894 +
        }
895 +
896 +
    def analyze(self, verbose: bool = False, hist: bool = False):
897 +
        """Analyze trials to get experimental risk.
898 +
899 +
        Args:
900 +
            verbose (bool): If true, analyze will print simulation analysis information.
901 +
            hist (bool): If true, analyze will generate and display 2 histograms: winner
902 +
                ballots found in the sample size and computed risk.
903 +
        """
904 +
        if self.db_mode:
905 +
            trials = self.db.trial_lookup(self.sim_id)
906 +
        else:
907 +
            trials = self.trials
908 +
        num_trials = 0
909 +
        stopped = 0
910 +
        rounds_stopped = []
911 +
        # TODO: Create additinal structures to store trial data
912 +
913 +
        for trial in trials:
914 +
            num_trials += 1
915 +
            if trial['stop']:
916 +
                stopped += 1
917 +
                rounds_stopped.append(trial['round'])
918 +
            # TODO: Extract more data from trial
919 +
920 +
        if verbose:
921 +
            print('Analysis\n========\n')
922 +
            print('Number of trials: {}'.format(num_trials))
923 +
            print('Experiemtnal Risk: {:.5f}'.format(stopped / num_trials))
924 +
            if stopped > 0:
925 +
                print('Average Rounds in Stopped Trials: {:.2f}'.format(sum(rounds_stopped) / stopped))
926 +
927 +
        if hist:
928 +
            histogram(rounds_stopped, 'Rounds reached in stopped trials.')
929 +
930 +
        # Update simulation entry to include analysis
931 +
        if self.db_mode:
932 +
            self.db.update_analysis(self.sim_id, (stopped / num_trials))
933 +
        return stopped / num_trials
934 +
935 +
936 +
class MinervaRandomMultiRoundStoppingProb(Simulation):
937 +
    """Simulate a multi-round Minerva audit for random subsequent sample sizes.
938 +
939 +
    The initial sample size, x, is given as input and further sample sizes are
940 +
    chosen randomly as an additioanl 0.5x to 1.5x ballots in the next round.
941 +
    The audit executes until it stops or reaches the maximum number of rounds.
942 +
    """
943 +
    sample_size: int
944 +
    max_rounds: int
945 +
    total_relevant_ballots: int
946 +
    vote_dist: List[Tuple[str, int]]
947 +
    audit: Minerva
948 +
949 +
    def __init__(self,
950 +
                 alpha,
951 +
                 reported,
952 +
                 sample_size,
953 +
                 max_rounds,
954 +
                 db_mode=True,
955 +
                 db_host='localhost',
956 +
                 db_name='r2b2',
957 +
                 db_port=27017,
958 +
                 user='writer',
959 +
                 pwd='icanwrite',
960 +
                 *args,
961 +
                 **kwargs):
962 +
        if 'sim_args' in kwargs:
963 +
            kwargs['sim_args']['max_rounds'] = max_rounds
964 +
        else:
965 +
            kwargs['sim_args'] = {'max_rounds': max_rounds}
966 +
        super().__init__('minerva', alpha, reported, 'reported', True, db_mode, db_host, db_port, db_name, user, pwd, *args, **kwargs)
967 +
        self.sample_size = sample_size
968 +
        self.max_rounds = max_rounds
969 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
970 +
        # FIXME: temporary until pairwise contest fix is implemented
971 +
        self.contest_ballots = self.reported.contest_ballots
972 +
        self.reported.contest_ballots = self.total_relevant_ballots
973 +
        self.reported.winner_prop = self.reported.tally[self.reported.reported_winners[0]] / self.reported.contest_ballots
974 +
        self.audit = Minerva(self.alpha, 1.0, self.reported)
975 +
976 +
        if sample_size < self.audit.min_sample_size:
977 +
            raise ValueError('Sample size is less than minimum sample size for audit.')
978 +
        if max_rounds < 2:
979 +
            raise ValueError('Maximum rounds is too small.')
980 +
981 +
        # FIXME: sorted candidate list will be created by new branch, update once merged
982 +
        # Generate a sorted underlying vote distribution for a tied election
983 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
984 +
        self.vote_dist = [(sorted_tally[0][0], sorted_tally[0][1])]
985 +
        current = sorted_tally[0][1]
986 +
        for i in range(1, len(sorted_tally)):
987 +
            current += sorted_tally[i][1]
988 +
            self.vote_dist.append((sorted_tally[i][0], current))
989 +
        self.vote_dist.append(('invalid', self.contest_ballots))
990 +
991 +
    def trial(self, seed):
992 +
        """Execute a 1-round minerva audit (using r2b2.minerva.Minerva)"""
993 +
994 +
        r.seed(seed)
995 +
996 +
        # Ensure audit is reset
997 +
        self.audit._reset()
998 +
999 +
        # Initialize first round with given initial sample size
1000 +
        round_num = 1
1001 +
        previous_sample_size = 0
1002 +
        current_sample_size = self.sample_size
1003 +
        stop = False
1004 +
1005 +
        # For each round
1006 +
        sample = [0 for i in range(len(self.vote_dist))]
1007 +
        while round_num <= self.max_rounds:
1008 +
            # Draw a sample of a given size
1009 +
            for i in range(current_sample_size - previous_sample_size):
1010 +
                ballot = r.randint(1, self.contest_ballots)
1011 +
                for j in range(len(sample)):
1012 +
                    if ballot <= self.vote_dist[j][1]:
1013 +
                        sample[j] += 1
1014 +
                        break
1015 +
1016 +
            relevant_sample_size = current_sample_size - sample[-1]
1017 +
1018 +
            # Perform audit computations
1019 +
            self.audit.rounds.append(relevant_sample_size)
1020 +
            self.audit.current_dist_null()
1021 +
            self.audit.current_dist_reported()
1022 +
            # Check is audit has completed
1023 +
            if (self.audit.stopping_condition(sample[0])):
1024 +
                stop = True
1025 +
            # Continue audit computations
1026 +
            kmin = self.audit.next_min_winner_ballots(relevant_sample_size)
1027 +
            self.audit.min_winner_ballots.append(kmin)
1028 +
            self.audit.truncate_dist_null()
1029 +
            self.audit.truncate_dist_reported()
1030 +
            self.audit.sample_winner_ballots.append(sample[0])
1031 +
1032 +
            # If audit is done, return trial output
1033 +
            # FIXME: Improve output format
1034 +
            if stop:
1035 +
                return {
1036 +
                    'stop': stop,
1037 +
                    'round': round_num,
1038 +
                    'p_value_sched': self.audit.pvalue_schedule,
1039 +
                    'p_value': self.audit.get_risk_level(),
1040 +
                    'relevant_sample_size_sched': self.audit.rounds,
1041 +
                    'winner_ballots_drawn_sched': self.audit.sample_winner_ballots,
1042 +
                    'kmin_sched': self.audit.min_winner_ballots
1043 +
                }
1044 +
1045 +
            # Else choose a next round size and continue
1046 +
            round_num += 1
1047 +
            sample_mult = r.uniform(0.5, 1.5)
1048 +
            next_sample = math.ceil(self.sample_size * sample_mult)
1049 +
            previous_sample_size = current_sample_size
1050 +
            current_sample_size += next_sample
1051 +
1052 +
        # If audit does not stop, return trial output
1053 +
        # FIXME: Improve output format
1054 +
        return {
1055 +
            'stop': stop,
1056 +
            'round': self.max_rounds,
1057 +
            'p_value_sched': self.audit.pvalue_schedule,
1058 +
            'p_value': self.audit.get_risk_level(),
1059 +
            'relevant_sample_size_sched': self.audit.rounds,
1060 +
            'winner_ballots_drawn_sched': self.audit.sample_winner_ballots,
1061 +
            'kmin_sched': self.audit.min_winner_ballots
1062 +
        }
1063 +
1064 +
    def analyze(self, verbose: bool = False, hist: bool = False):
1065 +
        """Analyze trials to get experimental risk.
1066 +
1067 +
        Args:
1068 +
            verbose (bool): If true, analyze will print simulation analysis information.
1069 +
            hist (bool): If true, analyze will generate and display 2 histograms: winner
1070 +
                ballots found in the sample size and computed risk.
1071 +
        """
1072 +
        if self.db_mode:
1073 +
            trials = self.db.trial_lookup(self.sim_id)
1074 +
        else:
1075 +
            trials = self.trials
1076 +
        num_trials = 0
1077 +
        stopped = 0
1078 +
        rounds_stopped = []
1079 +
        # TODO: Create additinal structures to store trial data
1080 +
1081 +
        for trial in trials:
1082 +
            num_trials += 1
1083 +
            if trial['stop']:
1084 +
                stopped += 1
1085 +
                rounds_stopped.append(trial['round'])
1086 +
            # TODO: Extract more data from trial
1087 +
1088 +
        if verbose:
1089 +
            print('Analysis\n========\n')
1090 +
            print('Number of trials: {}'.format(num_trials))
1091 +
            print('Stopping Probability: {:%}'.format(stopped / num_trials))
1092 +
            if stopped > 0:
1093 +
                print('Average Rounds in Stopped Trials: {:.2f}'.format(sum(rounds_stopped) / stopped))
1094 +
1095 +
        if hist:
1096 +
            histogram(rounds_stopped, 'Rounds reached in stopped trials.')
1097 +
1098 +
        # Update simulation entry to include analysis
1099 +
        if self.db_mode:
1100 +
            self.db.update_analysis(self.sim_id, (stopped / num_trials))
1101 +
        return stopped / num_trials
1102 +
1103 +
1104 +
class MinervaMultiRoundAlteredMargin(Simulation):
1105 +
    """Simulate a Minerva audit for a given sample size with a correct outcome but incorrect reported margin"""
1106 +
    underlying_margin: float
1107 +
    sample_size: int
1108 +
    max_rounds: int
1109 +
    total_relevant_ballots: int
1110 +
    vote_dist: List[Tuple[str, int]]
1111 +
    audit: Minerva
1112 +
1113 +
    def __init__(self,
1114 +
                 alpha,
1115 +
                 reported,
1116 +
                 underlying,
1117 +
                 underlying_margin,
1118 +
                 sample_size,
1119 +
                 max_rounds,
1120 +
                 db_mode=True,
1121 +
                 db_host='localhost',
1122 +
                 db_name='r2b2',
1123 +
                 db_port=27017,
1124 +
                 user='writer',
1125 +
                 pwd='icanwrite',
1126 +
                 *args,
1127 +
                 **kwargs):
1128 +
        if 'sim_args' in kwargs:
1129 +
            kwargs['sim__args']['max_rounds'] = max_rounds
1130 +
        else:
1131 +
            kwargs['sim_args'] = {'max_rounds': max_rounds}
1132 +
        super().__init__('minerva', alpha, reported, {
1133 +
            'change': underlying,
1134 +
            'margin': underlying_margin
1135 +
        }, True, db_mode, db_host, db_port, db_name, user, pwd, *args, **kwargs)
1136 +
        self.underlying_margin = underlying_margin
1137 +
        self.sample_size = sample_size
1138 +
        self.max_rounds = max_rounds
1139 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
1140 +
        # FIXME: temporary until pairwise contest fix is implemented
1141 +
        self.contest_ballots = self.reported.contest_ballots
1142 +
        self.reported.contest_ballots = self.total_relevant_ballots
1143 +
        self.reported.winner_prop = self.reported.tally[self.reported.reported_winners[0]] / self.reported.contest_ballots
1144 +
        self.audit = Minerva(self.alpha, 1.0, self.reported)
1145 +
1146 +
        if sample_size < self.audit.min_sample_size:
1147 +
            raise ValueError('Sample size is less than minimum sample size for audit')
1148 +
1149 +
        # FIXME: sorted candidate list will be created by new branch, update once merged
1150 +
        # Generate a sorted underlying vote distribution
1151 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
1152 +
        underlying_winner_prop = (1.0 + underlying_margin) / 2.0
1153 +
        self.vote_dist = [(sorted_tally[0][0], self.total_relevant_ballots * underlying_winner_prop)]
1154 +
        for i in range(1, len(sorted_tally)):
1155 +
            self.vote_dist.append((sorted_tally[i][0], self.total_relevant_ballots))
1156 +
        self.vote_dist.append(('invalid', self.contest_ballots))
1157 +
1158 +
    def trial(self, seed):
1159 +
        """Execute a multiround minerva audit."""
1160 +
1161 +
        r.seed(seed)
1162 +
1163 +
        # Ensure audit is reset
1164 +
        self.audit._reset()
1165 +
1166 +
        # Initialize first round with given initial sample size
1167 +
        round_num = 1
1168 +
        previous_sample_size = 0
1169 +
        current_sample_size = self.sample_size
1170 +
        stop = False
1171 +
1172 +
        # Draw a sample
1173 +
        sample = [0 for i in range(len(self.vote_dist))]
1174 +
        while round_num <= self.max_rounds:
1175 +
            # Draw a sample for current size
1176 +
            for i in range(current_sample_size - previous_sample_size):
1177 +
                ballot = r.randint(1, self.contest_ballots)
1178 +
                for j in range(len(sample)):
1179 +
                    if ballot <= self.vote_dist[j][1]:
1180 +
                        sample[j] += 1
1181 +
                        break
1182 +
            relevant_sample_size = current_sample_size - sample[-1]
1183 +
1184 +
            # Perform audit computations
1185 +
            self.audit.rounds.append(relevant_sample_size)
1186 +
            self.audit.current_dist_null()
1187 +
            self.audit.current_dist_reported()
1188 +
            # Check if audit is completed
1189 +
            if (self.audit.stopping_condition(sample[0])):
1190 +
                stop = True
1191 +
            # Continue audit computations
1192 +
            kmin = self.audit.next_min_winner_ballots(relevant_sample_size)
1193 +
            self.audit.min_winner_ballots.append(kmin)
1194 +
            self.audit.truncate_dist_null()
1195 +
            self.audit.truncate_dist_reported()
1196 +
            self.audit.sample_winner_ballots.append(sample[0])
1197 +
1198 +
            # If audit is done, return trial output
1199 +
            if stop:
1200 +
                return {
1201 +
                    'stop': stop,
1202 +
                    'round': round_num,
1203 +
                    'p_value_sched': self.audit.pvalue_schedule,
1204 +
                    'p_value': self.audit.get_risk_level(),
1205 +
                    'relevant_sample_size_sched': self.audit.rounds,
1206 +
                    'winner_ballots_drawn_sched': self.audit.sample_winner_ballots,
1207 +
                    'kmin_sched': self.audit.min_winner_ballots
1208 +
                }
1209 +
1210 +
            # Else choose a next round size and continue
1211 +
            round_num += 1
1212 +
            sample_mult = r.uniform(0.5, 1.5)
1213 +
            next_sample = math.ceil(self.sample_size * sample_mult)
1214 +
            previous_sample_size = current_sample_size
1215 +
            current_sample_size += next_sample
1216 +
1217 +
        # If audit does not stop, return trial output
1218 +
        return {
1219 +
            'stop': stop,
1220 +
            'round': self.max_rounds,
1221 +
            'p_value_sched': self.audit.pvalue_schedule,
1222 +
            'p_value': self.audit.get_risk_level(),
1223 +
            'relevant_sample_size_sched': self.audit.rounds,
1224 +
            'winner_ballots_drawn_sched': self.audit.sample_winner_ballots,
1225 +
            'kmin_sched': self.audit.min_winner_ballots
1226 +
        }
1227 +
1228 +
    def analyze(self, verbose: bool = False, hist: bool = False):
1229 +
        """Analyse trials to get experimental stopping probability"""
1230 +
        if self.db_mode:
1231 +
            trials = self.db.trial_lookup(self.sim_id)
1232 +
        else:
1233 +
            trials = self.trials
1234 +
        num_trials = 0
1235 +
        stopped = 0
1236 +
        winner_ballot_dist = []
1237 +
        total_risk = 0.0
1238 +
1239 +
        for trial in trials:
1240 +
            num_trials += 1
1241 +
            total_risk += trial['p_value']
1242 +
            if trial['stop']:
1243 +
                stopped += 1
1244 +
                winner_ballot_dist.append(trial['winner_ballots'])
1245 +
1246 +
        # TODO: insert verbose and histograms
1247 +
1248 +
        # Update simulation entry to include analysis
1249 +
        if self.db_mode:
1250 +
            analysis = {'avg_p_value': (total_risk / num_trials), 'sprob': (stopped / num_trials)}
1251 +
            self.db.update_analysis(self.sim_id, analysis)
1252 +
1253 +
        return analysis

@@ -0,0 +1,119 @@
Loading
1 +
"""Athena/Minerva Simulations using Filip Zagorski's athena library."""
2 +
3 +
import random as r
4 +
from typing import List
5 +
from typing import Tuple
6 +
7 +
from athena.audit import Audit
8 +
9 +
from r2b2.simulator import Simulation
10 +
from r2b2.simulator import histogram
11 +
12 +
13 +
class FZMinervaOneRoundRisk(Simulation):
14 +
    """Simulate a 1-round Minerva audit for a given sample size to compute risk limit."""
15 +
16 +
    sample_size: int
17 +
    total_relevant_ballots: int
18 +
    vote_dist: List[Tuple[str, int]]
19 +
    election_file: str
20 +
    reported_name: str
21 +
22 +
    def __init__(self,
23 +
                 alpha,
24 +
                 reported,
25 +
                 sample_size,
26 +
                 election_file,
27 +
                 reported_name,
28 +
                 db_mode=True,
29 +
                 db_host='localhost',
30 +
                 db_name='r2b2',
31 +
                 db_port=27017,
32 +
                 *args,
33 +
                 **kwargs):
34 +
        super().__init__('fz_minerva', alpha, reported, 'tie', True, db_mode, db_host, db_port, db_name, args, kwargs)
35 +
        self.sample_size = sample_size
36 +
        self.total_relevant_ballots = sum(self.reported.tally.values())
37 +
38 +
        # Generate underlying vote distribution associated with a tie
39 +
        sorted_tally = sorted(self.reported.tally.items(), key=lambda x: x[1], reverse=True)
40 +
        self.vote_dist = [(sorted_tally[0][0], self.total_relevant_ballots // 2)]
41 +
        for i in range(1, len(sorted_tally)):
42 +
            self.vote_dist.append((sorted_tally[i][0], self.total_relevant_ballots))
43 +
        self.vote_dist.append(('invalid', self.reported.contest_ballots))
44 +
45 +
        # Store info needed to create an audit for each trial
46 +
        self.election_file = election_file
47 +
        self.reported_name = reported_name
48 +
49 +
    def trial(self, seed):
50 +
        """Execute a 1-round minerva audit from Filip's athena code."""
51 +
52 +
        # Create a clean audit object
53 +
        # FIXME: Ideally, there should be a way to create the audit object once
54 +
        # and reset it's state before each trial. Re-reading the election and
55 +
        # contest seems very inefficient...
56 +
        audit = Audit('minerva', self.alpha)
57 +
        audit.read_election_results(self.election_file)
58 +
        audit.load_contest(self.reported_name)
59 +
60 +
        r.seed(seed)
61 +
62 +
        # Draw a sample of given size
63 +
        sample = [0 for i in range(len(self.vote_dist))]
64 +
        for i in range(self.sample_size):
65 +
            ballot = r.randint(1, self.reported.contest_ballots)
66 +
            for j in range(len(sample)):
67 +
                if ballot <= self.vote_dist[j][1]:
68 +
                    sample[j] += 1
69 +
                    break
70 +
71 +
        relevant_sample_size = self.sample_size - sample[-1]
72 +
73 +
        # Perform audit calculations
74 +
        # FIXME: set_observations() will always print, let's not do that
75 +
        audit.set_observations(self.sample_size, relevant_sample_size, sample[:len(sample) - 1])
76 +
        p_value = audit.status[self.reported_name].risks[-1]
77 +
78 +
        if p_value > self.alpha and audit.status[self.reported_name].audit_completed:
79 +
            raise Exception('Risk limit not met, audit says completed')
80 +
        elif p_value <= self.alpha and not audit.status[self.reported_name].audit_completed:
81 +
            raise Exception('Risk limit met, audit says not complete.')
82 +
83 +
        return {
84 +
            'stop': audit.status[self.reported_name].audit_completed,
85 +
            'p_value': p_value,
86 +
            'sample_size': self.sample_size,
87 +
            'relevant_sample_size': relevant_sample_size,
88 +
            'winner_ballots': sample[0]
89 +
        }
90 +
91 +
    def analyze(self):
92 +
        if self.db_mode:
93 +
            trials = self.db.trial_lookup(self.sim_id)
94 +
        else:
95 +
            trials = self.trials
96 +
        num_trials = 0
97 +
        stopped = 0
98 +
        total_risk = 0
99 +
        total_relevant_sampled = 0
100 +
        winner_ballot_dist = []
101 +
        risk_dist = []
102 +
103 +
        for trial in trials:
104 +
            num_trials += 1
105 +
            if trial['stop']:
106 +
                stopped += 1
107 +
108 +
            total_relevant_sampled += trial['relevant_sample_size']
109 +
            winner_ballot_dist.append(trial['winner_ballots'])
110 +
            total_risk += trial['p_value']
111 +
            risk_dist.append(trial['p_value'])
112 +
113 +
        print('Analysis\n========')
114 +
        print('Underlying election is tied\n')
115 +
        print('Number of trials: {}'.format(num_trials))
116 +
        print('Number of stopped: {}'.format(stopped))
117 +
        print('Risk: {:%}'.format(stopped / num_trials))
118 +
        histogram(winner_ballot_dist, 'Winner ballots found in sample of size: {}'.format(self.sample_size))
119 +
        histogram(risk_dist, 'Risk (p_value) dist.')

@@ -0,0 +1,29 @@
Loading
1 +
from r2b2.simulation.filip_athena import FZMinervaOneRoundRisk as FZMR
2 +
from r2b2.tests.util import parse_election
3 +
4 +
election_file = 'src/r2b2/tests/data/election_template.json'
5 +
contest_name = 'contest1'
6 +
election = parse_election(election_file)
7 +
contest = election.contests[contest_name]
8 +
9 +
10 +
def test_creation():
11 +
    sim = FZMR(0.1, contest, 10, election_file, contest_name, db_mode=False)
12 +
    assert(len(sim.trials) == 0)
13 +
    assert(sim.alpha == 0.1)
14 +
    assert(sim.sample_size == 10)
15 +
    assert(sim.audit_type == 'fz_minerva')
16 +
    assert(sim.underlying == 'tie')
17 +
    assert(sim.db is None)
18 +
    assert(sim.audit_id is None)
19 +
    assert(sim.reported_id is None)
20 +
    assert(sim.sim_id is None)
21 +
22 +
23 +
def test_one_trial():
24 +
    sim = FZMR(0.1, contest, 10, election_file, contest_name, db_mode=False)
25 +
    sim.run(1)
26 +
    assert(len(sim.trials) == 1)
27 +
    keys = list(sim.trials[0].keys())
28 +
    expected_keys = ['simulation', 'seed', 'stop', 'p_value', 'sample_size', 'relevant_sample_size', 'winner_ballots']
29 +
    assert(keys == expected_keys)

@@ -112,15 +112,20 @@
Loading
112 112
113 113
        # The number of ballots that will be drawn this round.
114 114
        if len(self.rounds) > 0:
115 -
            round_draw = n - self.rounds[-1]
115 +
            winner_ballots = self.sample_ballots[sub_audit.sub_contest.reported_winner][-1]
116 +
            loser_ballots = self.sample_ballots[sub_audit.sub_contest.reported_loser][-1]
117 +
            previous_round = winner_ballots + loser_ballots
118 +
            round_draw = n - previous_round
116 119
        else:
117 120
            round_draw = n
118 121
119 122
        num_dist_round_draw = binom.pmf(range(0, round_draw + 1), round_draw, p1)
120 123
        denom_dist_round_draw = binom.pmf(range(0, round_draw + 1), round_draw, p0)
121 124
        if len(self.rounds) > 0:
122 -
            num_dist = convolve(sub_audit.distribution_reported_tally, num_dist_round_draw, method='fft')
123 -
            denom_dist = convolve(sub_audit.distribution_null, denom_dist_round_draw, method='fft')
125 +
            num_dist = convolve(sub_audit.distribution_reported_tally, num_dist_round_draw, method='direct')
126 +
            denom_dist = convolve(sub_audit.distribution_null, denom_dist_round_draw, method='direct')
127 +
            num_dist = [abs(p) for p in num_dist]
128 +
            denom_dist = [abs(p) for p in denom_dist]
124 129
        else:
125 130
            num_dist = num_dist_round_draw
126 131
            denom_dist = denom_dist_round_draw
@@ -211,7 +216,7 @@
Loading
211 216
        estimates = []
212 217
        for sub_audit in self.sub_audits.values():
213 218
            # Scale estimates by pairwise invalid proportion
214 -
            proportion = float(sub_audit.sub_contest.contest_ballots) / float(self.contest.contest_ballots)
219 +
            proportion = float(self.contest.contest_ballots) / float(sub_audit.sub_contest.contest_ballots) 
215 220
            estimate = self._next_sample_size_pairwise(sub_audit, sprob)
216 221
            scaled_estimate = (int(estimate[0] * proportion), estimate[1], estimate[2])
217 222
            estimates.append(scaled_estimate)

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Learn more Showing 9 files with coverage changes found.

New file src/r2b2/simulation/filip_athena.py
New
Loading file...
New file src/r2b2/tests/test_filip_athena_sim.py
New
Loading file...
New file src/r2b2/simulator.py
New
Loading file...
New file src/r2b2/simulation/athena.py
New
Loading file...
New file src/r2b2/simulation/minerva.py
New
Loading file...
Changes in src/r2b2/audit.py
-2
+2
Loading file...
Changes in src/r2b2/contest.py
-1
+1
Loading file...
Changes in src/r2b2/athena.py
-2
-2
+4
Loading file...
Changes in src/r2b2/tests/util.py
-13
+13
Loading file...

71 Commits

Hiding 70 contexual commits
Files Coverage
src/r2b2 -21.90% 67.39%
tests 100.00%
Project Totals (25 files) 68.30%
Loading