In [4]:
import random 

def reservoir_sampling(stream,k):
    Sample =[]
    counter = 1 
    for x in stream:
        if(counter<=k): 
            Sample.append(x)
        else:
            c = random.randint(0,counter-1);
            if( c<k):
                Sample[c] = x 
        counter +=1
    return Sample


output = reservoir_sampling(range(30),2)
print(output)
[18, 16]
In [5]:
def weighted_reservoir_sampling(stream): 
    counter = 1 
    W = 0
    for x in stream: 
        W += x 
        if(counter == 1): 
            s = (x)
        else:
            p = x/W 
            if( random.random() < p):
                s = x 
        counter +=1
    return s
In [6]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
sampled_data =[]
for i in range(100):
    sampled_data.append( weighted_reservoir_sampling([100, 50, 5, 15]) )

    
weights = np.ones(( len(sampled_data) , 1 ))
plt.hist(sampled_data, weights=weights)
#np.histogram(sampled_data)
Out[6]:
(array([  2.,   5.,   0.,   0.,  25.,   0.,   0.,   0.,   0.,  68.]),
 array([   5. ,   14.5,   24. ,   33.5,   43. ,   52.5,   62. ,   71.5,
          81. ,   90.5,  100. ]),
 <a list of 10 Patch objects>)
In [ ]: