Optimal-Freshness-Crawl-Sch.../LambdaCrawlExps.py

import numpy as np;
import scipy as sp;
from scipy import optimize;
import math;
import time;
import sys;
import scipy.stats
import json
from operator import itemgetter


# This file contains the code for running the experiments in the paper
#
# A. Kolobov, Y. Peres, C. Lu, E. Horvitz. "Staying up to Date with Online Content Changes Using Reinforcement Learning for Scheduling." NeurIPS-2019.

epsilon_learn_global = 0.0001

# Learns sources' Poisson change rates from observation histories.
#
# The curr_time parameter is supposed to be a time relative to the same timestamp as the timestamps in histories. The suffix_length parameter is the
# length of history suffix immediately preceding curr_time telling this method to use only thehistory interval from [curr_time - suffix, curr_time]
# for learning the change rates.
def LearnChRates(observations_incompl_obs, ch_rates_incompl_obs_est, observations_compl_obs, \
    ch_rates_compl_obs_est, epsilon_learn, suffix_length, curr_time):
    for w in range(len(ch_rates_incompl_obs_est)):
        ch_rates_incompl_obs_est[w] = LearnChRates_IncomplObs(observations_incompl_obs[w], suffix_length, curr_time, epsilon_learn)
    for w in range(len(ch_rates_compl_obs_est)):
        ch_rates_compl_obs_est[w] = LearnChRates_ComplObs(observations_compl_obs[w], suffix_length, curr_time)


# Learns a sources' Poisson change rate from the sources' observation history assuming complete change observations.
#
# The training_data parameter is an array of timestamps when changes were detected, sorted in the increasing order. The timestamps are assumed to be
# relative to some time point in the past. The suffix_length parameter is the length of history suffix immediately preceding curr_time telling this
# method to use only thehistory interval from [curr_time - suffix, curr_time] for learning the change rates. The method uses Equation 11 from the
# NeurIPS-2019 paper for learning the change rate.
def LearnChRates_ComplObs(training_data, suffix_length, curr_time):
    # We use a type of smoothing whereby we assume we start with some number (not necessarily an integer) of imaginary change observations over an
    # imaginary period of time. Their values we used for the experiments in the NeurIPS-2019 paper are below.
    smoothing_count_term = 0.5
    smoothing_interval_term = 0.5
    total_changes = 0
    time_so_far = 0

    i = len(training_data) - 1
    while (i >= 0 and training_data[i] > curr_time):
        i -= 1

    if (i < 0):
        # the observation history before curr_time is empty
        return smoothing_count_term / (smoothing_interval_term + min(curr_time, suffix_length))
    elif (curr_time - training_data[i] > suffix_length):
        # there were no observations in the time interval [curr_time - suffix_length, curr_time]
        return smoothing_count_term / (smoothing_interval_term + suffix_length)
    else:
        time_so_far = curr_time - training_data[i]

    while (time_so_far < suffix_length and i >= 0):
        # We assume we are fresh w.r.t. every source when we start crawling
        previous_interval = (training_data[i] - training_data[i - 1]) if (i > 0) else training_data[i]
        total_changes += 1
        time_so_far = min(time_so_far + previous_interval, suffix_length)
        i -= 1

    return ((total_changes + smoothing_count_term) / (time_so_far + smoothing_interval_term))


# Learns a source's Poisson change rate from the source's observation history assuming incomplete change observations.
#
# The training_data parameter is a 2D array of history data for a given source, where training_data[i][0] is the timestamp of the i-th crawl happened,
# assumed to be relative to some time point in the past, and training_data[i][0] is a 0/1 flag indicating where that crawl detected any changes. The
# suffix_length parameter is the length of history suffix immediately preceding curr_time telling this method to use only the history interval from
# [curr_time - suffix, curr_time] for learning the change rates. The method uses Equation 10 from the NeurIPS-2019 paper for learning the change rate.
def LearnChRates_IncomplObs(training_data, suffix_length, curr_time, epsilon):
    # We use a type of smoothing whereby we assume we start with some number of inter-crawl intervals (not necessarily an integer) of various lengths
    # that detected a change and some number that didn't. We used one interval of each kind, whose length in both cases is the same, shown below.
    smoothing_term = 0.5
    time_so_far = 0

    # Initialize the sets of inter-observation intervals assuming we had one imaginary interval after which we observed
    # a change and one imaginary interval after which we didn't, with the length of either interval equal to
    # the smoothing term.
    list_ch_training_intervals = [smoothing_term]
    list_no_ch_training_intervals = [smoothing_term]

    i = len(training_data) - 1
    while (i >= 0 and training_data[i][0] > curr_time):
        i -= 1

    if (i < 0):
        # The observation history before curr_time is empty. Return the change rate estimate
        # assuming we had one imaginary interval after which we observed a change and
        # one imaginary interval after which we didn't, with the length of either interval equal to
        # the smoothing term.
        return math.log(2) / smoothing_term

    # Count back the history suffix starting with the time of the latest observation (crawl) that happened before curr_time. We can't use the time
    # interval between the latest crawl and the current time for training purposes, because we haven't collected any training data (observations)
    # in that interval, and hence, due to the incompleteness of our observation history, don't know whether any changes have happened since the
    # latest observation
    while (time_so_far < suffix_length and i >= 0):
        previous_interval = (training_data[i][0] - training_data[i - 1][0]) if (i > 0) else training_data[i][0]
        # Note that after the next line, it may be that time_so_far + previous_interval > suffix_length.
        # This is fine -- to make learning less biased, we need to either use the last observation that falls into
        # [reference_time - suffix_length, reference_time] *and* the *entire* intra-observation interval
        # preceding it, or use neither that observation nor that interval. We choose to do the former.
        if training_data[i][1] == 1:
            list_ch_training_intervals.append(previous_interval)
        else:
            list_no_ch_training_intervals.append(previous_interval)

        time_so_far += previous_interval
        i -= 1

    max_ch_interval = max(list_ch_training_intervals)
    min_ch_interval = min(list_ch_training_intervals)
    sum_ch_intervals = sum(list_ch_training_intervals)
    sum_no_ch_intervals = sum(list_no_ch_training_intervals)

    # Based on the MLE for incomplete observation history, it's easy to verify that the bounds always bracket
    # the change rate estimate. However, they affect the exponent of e during learning, and when they are
    # too loose, cause numerical errors. Therefore, we bound the range of change rate's values based on reasonable
    # guesses based on prior knowledge.
    ch_rate_lb = 0.0005 # math.log(sum_ch_intervals / sum_no_ch_intervals + 1) / max_ch_interval;
    ch_rate_ub = 7 # math.log(sum_ch_intervals / sum_no_ch_intervals + 1) / min_ch_interval;

    return BisectionSearchMonotoneDecr(DataLLDerivative, (list_ch_training_intervals, list_no_ch_training_intervals), ch_rate_lb, ch_rate_ub, epsilon)


# Computes the data log likelihood of a history of incomplete change observations under a given Poisson change rate.
def DataLLDerivative(params, ch_rate):
    term_with_changes = 0
    term_without_changes = 0
    list_ch_training_intervals = params[0]
    list_no_ch_training_intervals = params[1]

    for interval in list_ch_training_intervals:
        term_with_changes += (interval / (math.exp(ch_rate * interval) - 1))

    for interval in list_no_ch_training_intervals:
        term_without_changes += interval

    return (term_with_changes - term_without_changes)


# Evaluates the Lagrangian of the negative harmonic policy cost under for a given lambda.
#
# See Proposition 2 and equation system 5 in the NeurIPS-2019 paper for details.
def PolicyValueLagr_NoObs(params, lambd):
    sum = 0
    importances = params[0]
    ch_rates = params[1]
    bandwidth = params[2]

    # We are assuming importances and cr_rates have the same length
    for i in range(len(importances)):
        sum += (-ch_rates[i] + math.sqrt(ch_rates[i]**2 + 4 * importances[i] * ch_rates[i] / lambd)) / 2

    return sum - bandwidth


# Bisection search for the root of a 1-variable function parameterized by known parameter values (param) and assumed to be monotonically decreasing
# on the interval [var_lb_init, var_ub_init].
#
# The assumption that the function is monotonically decreasing knowledge lets us save one function evaluation.
def BisectionSearchMonotoneDecr(func, params, var_lb_init, var_ub_init, epsilon):
    var_lb = var_lb_init
    var_ub = var_ub_init
    var_center = (var_ub + var_lb) / 2

    num_it = 0
    while ((var_ub - var_lb) /  2 > epsilon):
        num_it += 1
        func_val = func(params, var_center)
        if (func_val == 0):
            return var_center
        elif (func_val > 0):
            var_lb = var_center
        else:
            var_ub = var_center
        var_center = (var_ub + var_lb) / 2
    return var_center


# LambdaCrawl for sources with incomplete change observations.
#
# See the pseudocode in Algorithm 1 in the NeurIPS-2019 paper for details.
def LambdaCrawl_IncomplObs(importances, ch_rates, bandwidth, epsilon):
    if (bandwidth == 0 or importances.shape[0] == 0):
        return np.zeros_like(importances, dtype=float)

    min_importance = min(importances)
    max_importance = max(importances)
    min_ch_rate = min(ch_rates)
    max_ch_rate = max(ch_rates)
    lambda_lb = len(importances)**2 * min_importance * min_ch_rate / (len(importances) * max_ch_rate * bandwidth + bandwidth**2)
    lambda_ub = len(importances)**2 * max_importance * max_ch_rate / (len(importances) * min_ch_rate * bandwidth + bandwidth**2)
    lambd = BisectionSearchMonotoneDecr(PolicyValueLagr_NoObs, (importances, ch_rates, bandwidth), lambda_lb, lambda_ub, epsilon)
    crawl_rates = np.zeros_like(importances, dtype=float)

    for i in range(len(crawl_rates)):
        crawl_rates[i] = (-ch_rates[i] + math.sqrt(ch_rates[i]**2 + 4 * importances[i] * ch_rates[i] / lambd)) / 2

    return crawl_rates


# LambdaCrawl approximation for sources with incomplete change observations that assumes importance_w/change_rate_w = c for a fixed (unknown) c
# for all sources w.
#
# See Proposition 9 in the NeurIPS-2019 paper for details.
def LambdaCrawlApprox_IncomplObs(importances, ch_rates, bandwidth, epsilon):
    crawl_rates = np.zeros_like(importances, dtype=float)
    sum_imp = sum(importances)

    for w in range(len(importances)):
        crawl_rates[w] = importances[w] * bandwidth / sum_imp

    return crawl_rates


# LambdaCrawl for sources with complete change observations.
#
# See the pseudocode in Algorithm 2 in the NeurIPS-2019 paper for details.
def LambdaCrawl_ComplObs(importances, ch_rates, bandwidth):
    if (bandwidth == 0):
        return np.zeros_like(importances, dtype=float)

    crawl_probs = np.zeros_like(importances, dtype=float)
    crawl_probs_approx = np.empty_like(importances, dtype=float)
    remaining_bandwidth = bandwidth

    num_it = 0
    while True:
        num_it += 1
        saturated_a_constraint = False
        # For the calculation of the denominator later in this loop, we will need the sum of importances, *but only of those sources
        # for which we haven't determined crawl_probs[w] = 1 in previous iterations*. Sources for which we have determined this have
        # been excluded from consideration entirely.
        sum_importances = 0
        for w in range(len(importances)):
            if crawl_probs[w] == 0:
                sum_importances += importances[w]

        for w in range(len(importances)):
            # under any acceptable solution, the crawl probability of every source is > 0, so crawl_probs[i] = 0
            # meanst that we haven't determined the final value for this source yet
            if crawl_probs[w] == 0:
                p_hat_w = bandwidth * importances[w] / (ch_rates[w] * sum_importances)
                # if the probability constraint p_w <= 1 is saturated for source w...
                if (p_hat_w >= 1.0):
                    crawl_probs[w] = 1.0
                    # indicate that we don't need to reconsider the crawl probability value of source w in subsequent iterations
                    crawl_probs_approx[w] = -1
                    remaining_bandwidth -= (crawl_probs[w] * ch_rates[w])
                    saturated_a_constraint = True
                else:
                    crawl_probs_approx[w] = p_hat_w

        # if we didn't saturate any constraints in the last iteration, the non-negative values in
        # crawl_probs_approx are the final crawl probability values of the corresponding sources
        if saturated_a_constraint == False:
            for w in range(len(importances)):
                if (crawl_probs_approx[w] != -1):
                    crawl_probs[w] = crawl_probs_approx[w]
            break
        # otherwise, repeat with remaining bandwidth and sources that don't have crawl_probs_approx[w] = -1 yet
        else:
            bandwidth = remaining_bandwidth

    return crawl_probs


# Computes the harmonic policy cost for URLs with incomplete observations.
#
# See Proposition 1/Equation 4 in the NeurIPS-2019 paper.
def HarmonicPolicyCost_IncomplObs(importances, ch_rates, crawl_rates):
    if (len(importances) == 0):
        return 0

    cost_incompl_obs = 0
    for w in range(len(importances)):
        if crawl_rates[w] == 0:
            cost_incompl_obs = math.inf
            break
        cost_incompl_obs -= importances[w] * math.log(crawl_rates[w] / (crawl_rates[w] + ch_rates[w]))
    return cost_incompl_obs / len(importances)


# Computes the harmonic policy cost for URLs with complete observations.
#
# See the formula in Proposition 4 in the NeurIPS-2019 paper.
def HarmonicPolicyCost_ComplObs(importances, ch_rates, crawl_probs):
    if (len(importances) == 0):
        return 0

    cost_compl_obs = 0
    for w in range(len(importances)):
        if crawl_probs[w] == 0:
            cost_compl_obs = math.inf
            break
        cost_compl_obs -= importances[w] * math.log(crawl_probs[w])
    return cost_compl_obs / len(importances)


# Computes the harmonic policy cost for a mix of URLs with complete and incomplete observations.
#
# See the Equation 8 in the NeurIPS-2019 paper.
def HarmonicPolicyCost(crawl_rates_incompl_obs, crawl_probs_compl_obs, importances_incompl_obs, ch_rates_incompl_obs, \
    importances_compl_obs, ch_rates_compl_obs):

    cost_incompl_obs = HarmonicPolicyCost_IncomplObs(importances_incompl_obs, ch_rates_incompl_obs, crawl_rates_incompl_obs)
    cost_compl_obs = HarmonicPolicyCost_ComplObs(importances_compl_obs, ch_rates_compl_obs, crawl_probs_compl_obs)

    return ((len(importances_incompl_obs) * cost_incompl_obs + len(importances_compl_obs) * cost_compl_obs) \
        / (len(importances_incompl_obs) + len(importances_compl_obs)))


# Computes the binary policy cost for URLs with incomplete observations.
#
# See Equation 12 in the NeurIPS-2019 paper's supplement.
def BinaryPolicyCost_IncomplObs(importances, ch_rates, crawl_rates):
    if (len(importances) == 0):
        return 0

    cost_incompl_obs = 0
    for w in range(len(importances)):
        cost_incompl_obs += importances[w] * ch_rates[w] / (crawl_rates[w] + ch_rates[w])
    return cost_incompl_obs / len(importances)


# Computes the binary policy cost for URLs with complete observations.
#
# See Equation 13 in the NeurIPS-2019 paper's supplement.
def BinaryPolicyCost_ComplObs(importances, ch_rates, crawl_probs):
    if (len(importances) == 0):
        return 0

    cost_compl_obs = 0
    for w in range(len(importances)):
        cost_compl_obs += importances[w] * (1 - crawl_probs[w])
    return cost_compl_obs / len(importances)


# Computes the binary policy cost for a mix of URLs with complete and incomplete observations.
def BinaryPolicyCost(crawl_rates_incompl_obs, crawl_probs_compl_obs, importances_incompl_obs, ch_rates_incompl_obs, \
    importances_compl_obs, ch_rates_compl_obs):
    cost_incompl_obs = BinaryPolicyCost_IncomplObs(importances_incompl_obs, ch_rates_incompl_obs, crawl_rates_incompl_obs)
    cost_compl_obs = BinaryPolicyCost_ComplObs(importances_compl_obs, ch_rates_compl_obs, crawl_probs_compl_obs)
    return ((len(importances_incompl_obs) * cost_incompl_obs + len(importances_compl_obs) * cost_compl_obs) \
        / (len(importances_incompl_obs) + len(importances_compl_obs)))


# Computes the harmonic policy cost for a given bandwidth split across complete- and incomplete-change-history URLs.
#
# See Algorithm 3 in the NeurIPS-2019 paper. The return of the function in the pseudocode is the negative of SplitEval_JStar.
def SplitEval_JStar(bandwidth_compl_obs, solver_x_incompl_obs, importances_incompl_obs, ch_rates_incompl_obs, epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs, bandwidth):
    if (bandwidth_compl_obs > bandwidth):
        raise ValueError('SplitEval_JStar ERROR: bandwidth allocation to sources with complete observations exceeds total bandwidth! Bandwidth allocation to sources with complete observations: ', bandwidth_compl_obs, ", total bandwidth: ", bandwidth)

    crawl_rates_incompl_obs = solver_x_incompl_obs(importances_incompl_obs, ch_rates_incompl_obs, bandwidth - bandwidth_compl_obs, epsilon_incompl_obs)
    J_incompl_obs = 0
    for w in range(len(importances_incompl_obs)):
        if crawl_rates_incompl_obs[w] == 0:
            J_incompl_obs = math.inf
            break
        else:
            J_incompl_obs -= (importances_incompl_obs[w] * math.log(crawl_rates_incompl_obs[w] / (crawl_rates_incompl_obs[w] + ch_rates_incompl_obs[w])))

    crawl_probs_compl_obs = LambdaCrawl_ComplObs(importances_compl_obs, ch_rates_compl_obs, bandwidth_compl_obs)
    J_compl_obs = 0
    for w in range(len(importances_compl_obs)):
        if crawl_probs_compl_obs[w] == 0:
            J_compl_obs = math.inf
            break
        else:
            J_compl_obs -= (importances_compl_obs[w] * math.log(crawl_probs_compl_obs[w]))

    return  J_incompl_obs + J_compl_obs


# Implements the LambdaCrawl family of algorithms.
#
# See Algorithm 3 in the NeurIPS-2019 paper. The implementation can use either the optimal LambdaCrawlApprox_IncomplObs or the approximate
# LambdaCrawlApprox_IncomplObs for handling the incomplete-change-observation sources.
def LambdaCrawl_X(solver_x_incompl_obs, importances_incompl_obs, ch_rates_incompl_obs, epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs, bandwidth):
    # We use a minimization routine here, so SplitEval_JStar returns the value of J* for a given split, _not_ of \overline{J}^* = -J^*
    # as in LambdaCrawl's description in the paper.
    result = sp.optimize.minimize_scalar(SplitEval_JStar, bounds=(0, min(bandwidth, sum(ch_rates_compl_obs))), \
        args=(solver_x_incompl_obs, importances_incompl_obs, ch_rates_incompl_obs, epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs, bandwidth), \
        method='bounded', options={'xatol': 0.005 * bandwidth})

    if result.success:
        bandwidth_compl_obs = result.x
        crawl_rates_incompl_obs = solver_x_incompl_obs(importances_incompl_obs, ch_rates_incompl_obs, bandwidth - bandwidth_compl_obs, epsilon_incompl_obs)
        crawl_probs_compl_obs = LambdaCrawl_ComplObs(importances_compl_obs, ch_rates_compl_obs, bandwidth_compl_obs)
        return (crawl_rates_incompl_obs, crawl_probs_compl_obs)
    else:
        raise ValueError('LambdaCrawl ERROR: bounded minimization failed')


# Implements LambdaCrawl proper. See LambdaCrawl_X for details.
def LambdaCrawl(importances_incompl_obs, ch_rates_incompl_obs, epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs, bandwidth):
    return LambdaCrawl_X(LambdaCrawl_IncomplObs, importances_incompl_obs, ch_rates_incompl_obs, epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs, bandwidth)


# Implements LambdaCrawlApprox, i.e., LambdaCrawl that uses the approximation from Proposition 9 in the NeurIPS-2019 paper to handle the
# incomplete-change-observation sources. See LambdaCrawl_X for details.
def LambdaCrawlApprox(importances_incompl_obs, ch_rates_incompl_obs, epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs, bandwidth):
    return LambdaCrawl_X(LambdaCrawlApprox_IncomplObs, importances_incompl_obs, ch_rates_incompl_obs, epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs, bandwidth)


# Implements the BinaryLambdaCrawl family of algorithms.
#
# This is a generalization of the algorithm from Y. Azar, E. Horvitz, E. Lubetzky, Y. Peres, D. Shahaf. "Tractable near-optimal policies for crawling."
# PNAS-2018, which the NeurIPS-2019 paper refers to as BinaryLambdaCrawl. That algorithm optimizes the binary policy cost (see that paper for
# details) and as a result may fail to allocate any crawl rate to some of the sources. BinaryLambdaCrawl(epsilon) (see Subsection 9.3 in the
# supplement of the NeurIPS-2019 paper) is a modification of BinaryLambdaCrawl that forces it to allocate some bandwidth even to pages that
# BinaryLambdaCrawl would otherwise crawl-starve. In this family, BinaryLambdaCrawl(0.0) corresponds to the original BinaryLambdaCrawl.
# BinaryLambdaCrawl(0.4) has the best performance on the NeurIPS-2019 paper's dataset w.r.t. the harmonic policy cost, of all
# BinaryLambdaCrawl(epsilon) with epsilon in {0.0, 0.1,...,1}.
#
# The wrapper handles sources with complete and incomplete change observations. BinaryLambdaCrawl doesn't know how to handle the former in any special
# way; it simply treats them as if their observation history was incomplete. See LambdaCrawlBinary_Epsilon_Helper for most of this algorithm's logic.
def LambdaCrawlBinary_Epsilon(importances_incompl_obs, ch_rates_incompl_obs, epsilon, importances_compl_obs, ch_rates_compl_obs, bandwidth):
    if (len(importances_compl_obs) > 0):
        sys.exit("ERROR: LambdaCrawlBinary_Epsilon doesn't know how to handle complete observation histories, but importances_compl_obs is nonempty")
    imps_and_chrates = np.column_stack((importances_incompl_obs, ch_rates_incompl_obs))
    crawl_rates = np.zeros_like(imps_and_chrates[:,0], dtype=float)
    min_crawl_rate = bandwidth / imps_and_chrates.shape[0] * epsilon
    LambdaCrawlBinary_Epsilon_Helper(imps_and_chrates, crawl_rates, bandwidth, min_crawl_rate)
    return crawl_rates, []


# The main part of BinaryLambdaCrawl implementation.
#
# See Section 9.3 of the NeurIPS-2019 paper's supplement and Y. Azar, E. Horvitz, E. Lubetzky, Y. Peres, D. Shahaf. "Tractable near-optimal policies
# for crawling." PNAS-2018 for details.
def LambdaCrawlBinary_Epsilon_Helper(imps_and_chrates, crawl_rates, bandwidth, min_crawl_rate):
    idxs_and_value_ratios = []
    for w in range(imps_and_chrates.shape[0]):
        idxs_and_value_ratios.append((imps_and_chrates[w, 0] *  imps_and_chrates[w, 1] / (imps_and_chrates[w, 1] + min_crawl_rate)**2, w))

    r = 0
    for w in range(len(idxs_and_value_ratios)):
        r += math.sqrt(imps_and_chrates[w, 0] * imps_and_chrates[w, 1])
    s = sum(imps_and_chrates[:, 1])
    idxs_and_value_ratios.sort(key=itemgetter(0))
    rem_bandwidth = bandwidth
    for w in range(len(idxs_and_value_ratios)):
        if (imps_and_chrates[idxs_and_value_ratios[w][1], 0] * imps_and_chrates[idxs_and_value_ratios[w][1], 1] \
                / (imps_and_chrates[idxs_and_value_ratios[w][1], 1] + min_crawl_rate)**2 <= (r / (rem_bandwidth + s ))**2):
            r -= math.sqrt(imps_and_chrates[idxs_and_value_ratios[w][1], 0] \
                * imps_and_chrates[idxs_and_value_ratios[w][1], 1])
            s -= imps_and_chrates[idxs_and_value_ratios[w][1], 1]
            crawl_rates[idxs_and_value_ratios[w][1]] = min_crawl_rate
            rem_bandwidth -= crawl_rates[idxs_and_value_ratios[w][1]]
        else:
            # NOTE: this clause kicks in at every iteration after some iteration M. It doesn't alternate with the clause above.
            crawl_rates[idxs_and_value_ratios[w][1]] = \
                math.sqrt(imps_and_chrates[idxs_and_value_ratios[w][1], 0] \
                * imps_and_chrates[idxs_and_value_ratios[w][1], 1]) \
                * (rem_bandwidth + s) / r - imps_and_chrates[idxs_and_value_ratios[w][1], 1]


# Implements the RL version of the LambdaCrawl family of algorithms.
#
# See Algorithm 4 in the NeurIPS-2019 paper. The implementation can use either LambdaCrawl itself, or LambdaCrawlApprox, its approximate version,
# or LambdaCrawlBinary that minimizes binary policy cost as solver_x, the algorithm for handling incomplete-change-observation sources.
def LambdaLearnAndCrawl_X(solver_x, importances_incompl_obs, ch_rates_incompl_obs_actual, epsilon_incompl_obs, \
    importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn, epoch_length, suffix_len, horizon):

    start_total = time.time()
    curr_time = 0
    changes_incompl_obs = np.empty(len(importances_incompl_obs), dtype=np.object)
    for i in range(len(changes_incompl_obs)): changes_incompl_obs[i] = []
    crawls_incompl_obs = np.empty(len(importances_incompl_obs), dtype=np.object)
    for i in range(len(crawls_incompl_obs)): crawls_incompl_obs[i] = []
    observations_incompl_obs = np.empty(len(importances_incompl_obs), dtype=np.object)
    for i in range(len(observations_incompl_obs)): observations_incompl_obs[i] = []
    changes_compl_obs = np.empty(len(importances_compl_obs), dtype=np.object)
    for i in range(len(changes_compl_obs)): changes_compl_obs[i] = []
    crawls_compl_obs = np.empty(len(importances_compl_obs), dtype=np.object)
    for i in range(len(crawls_compl_obs)): crawls_compl_obs[i] = []
    observations_compl_obs = changes_compl_obs # since the observation sequence is complete, we observe every change
    ch_rates_incompl_obs_est = [1.0] * len(ch_rates_incompl_obs_actual)
    ch_rates_compl_obs_est = [1.0] * len(ch_rates_compl_obs_actual)

    idealized_policy_costs_harmonic = []
    idealized_policy_costs_binary = []

    while curr_time < horizon:
        start_sch = time.time()
        # Compute the optimal policy parameters given the current change rate estimates.
        crawl_rates_incompl_obs, crawl_probs_compl_obs = solver_x(importances_incompl_obs, ch_rates_incompl_obs_est, \
            epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs_est, bandwidth)
        # Apply Propositions 1 & 4 from the NeurIPS-2019 paper to find the (asymptotic) harmonic policy cost.
        idealized_policy_cost_harmonic = HarmonicPolicyCost(crawl_rates_incompl_obs, crawl_probs_compl_obs, importances_incompl_obs,\
                ch_rates_incompl_obs_actual, importances_compl_obs, ch_rates_compl_obs_actual)
        # Apply Equations 12 & 13 from the NeurIPS-2019 paper's supplement to find the (asymptotic) binary policy cost.
        idealized_policy_cost_binary = BinaryPolicyCost(crawl_rates_incompl_obs, crawl_probs_compl_obs, importances_incompl_obs,\
                ch_rates_incompl_obs_actual, importances_compl_obs, ch_rates_compl_obs_actual)

        print("Idealized harmonic policy cost: ", idealized_policy_cost_harmonic)
        idealized_policy_costs_harmonic.append(idealized_policy_cost_harmonic)
        idealized_policy_costs_binary.append(idealized_policy_cost_binary)
        end_sch = time.time()
        start_ext = time.time()
        # Advance the simulation time by until the horizon: simulate source changes for sources of both types until the horizon, then, given these
        # changes, simulate crawls from the current policy using policy parameters computed above, and finally generate observations until the
        # horizon, given the generated changes and crawls. Note that for extending the observation history of complete-observation
        # sources, the scheduling policy doesn't matter.
        ExtendChangeHistory(changes_incompl_obs, ch_rates_incompl_obs_actual, curr_time, epoch_length)
        ExtendChangeHistory(changes_compl_obs, ch_rates_compl_obs_actual, curr_time, epoch_length)
        ExtendCrawlHistory_IncomplObs(crawls_incompl_obs, crawl_rates_incompl_obs, curr_time, epoch_length)
        ExtendCrawlHistory_ComplObs(changes_compl_obs, crawls_compl_obs, crawl_probs_compl_obs, curr_time, epoch_length)
        ExtendObsHistory_IncomplObs(changes_incompl_obs, crawls_incompl_obs, observations_incompl_obs, curr_time, epoch_length)
        end_ext = time.time()
        curr_time = min(curr_time + epoch_length, horizon)
        start_learn = time.time()
        # Re-learn the change rates from the extended observation data. There is actually no need to learn for the incomplete-observation sources
        # in case we use LambdaLearnAndCrawlApprox, but we learn them anyway.
        LearnChRates(observations_incompl_obs, ch_rates_incompl_obs_est, observations_compl_obs, ch_rates_compl_obs_est, \
            epsilon_learn, math.inf, curr_time)
        end_learn = time.time()
        #print('\tLearning took {} seconds'.format(end_learn - start_learn))

    end_total = time.time()
    total_time = end_total - start_total
    print('RL took {} seconds in total'.format(total_time))
    policy_cost =  EvalMixedTrace(importances_incompl_obs, changes_incompl_obs, crawls_incompl_obs, importances_compl_obs, changes_compl_obs, \
        crawls_compl_obs, horizon)
    return policy_cost, total_time, idealized_policy_costs_harmonic, idealized_policy_costs_binary


# Implements LambdaLearnAndCrawl. See LambdaLearnAndCrawl_X for details.
def LambdaLearnAndCrawl(importances_incompl_obs, ch_rates_incompl_obs_actual , epsilon_incompl_obs, \
    importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn, epoch_length, suffix_len, horizon):
    print("Running LambdaLearnAndCrawl...")
    return LambdaLearnAndCrawl_X(LambdaCrawl, importances_incompl_obs, ch_rates_incompl_obs_actual , epsilon_incompl_obs, \
        importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn, epoch_length, suffix_len, horizon)


# Implements LambdaLearnAndCrawlApprox. See LambdaLearnAndCrawl_X for details.
def LambdaLearnAndCrawlApprox(importances_incompl_obs, ch_rates_incompl_obs_actual , epsilon_incompl_obs, \
    importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn, epoch_length, suffix_len, horizon):
    print("Running LambdaLearnAndCrawlApprox...")
    return LambdaLearnAndCrawl_X(LambdaCrawlApprox, importances_incompl_obs, ch_rates_incompl_obs_actual , epsilon_incompl_obs, \
        importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn, epoch_length, suffix_len, horizon)


# Implements LambdaLearnAndCrawlBinary. See LambdaLearnAndCrawl_X for details.
def LambdaLearnAndCrawlBinary(importances_incompl_obs, ch_rates_incompl_obs_actual , epsilon_incompl_obs, \
    importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn, epoch_length, suffix_len, horizon):
    print("Running LambdaLearnAndCrawlBinary...")
    # LambdaCrawlBinary doesn't know how to deal with pages with complete change observations, so put all pages into the "incomplete change
    # observations bucket.
    importances_incompl_obs_all = np.concatenate((importances_incompl_obs, importances_compl_obs))
    ch_rates_incompl_obs_actual_all = np.concatenate((ch_rates_incompl_obs_actual, ch_rates_compl_obs_actual))
    return LambdaLearnAndCrawl_X(LambdaCrawlBinary_Epsilon, importances_incompl_obs_all, ch_rates_incompl_obs_actual_all, epsilon_incompl_obs, \
        [], [], bandwidth, epsilon_learn, epoch_length, suffix_len, horizon)


# Computes the n-th Harmonic number.
def Harmonic(n):
    # Not very efficient for large n, but we don't expect n to be large under LambdaCrawl
    return sum(1/k for k in range(1, n + 1))


# Computes the time-averaged harmonic penalty for a LambdaCrawl policy given a history of changes for a mixed set of sources with complete and
# incomplete change observations.
#
# Note that we need the history of changes themselves, not of observations, to do this evaluation. See EvalTrace for more details.
def EvalMixedTrace(importances_incompl_obs, changes_incompl_obs, crawls_incompl_obs, importances_compl_obs, changes_compl_obs, \
    crawls_compl_obs, horizon):
    return (len(importances_incompl_obs) * EvalTrace(importances_incompl_obs, changes_incompl_obs, crawls_incompl_obs, horizon) + \
        len(importances_compl_obs) * EvalTrace(importances_compl_obs, changes_compl_obs, crawls_compl_obs, horizon)) / (len(importances_incompl_obs) + len(importances_compl_obs))


# Computes the time-averaged harmonic penalty for a LambdaCrawl policy.
def EvalTrace(importances, ch_hists, crawl_hists, horizon):
    if (len(importances) == 0):
        return 0
    # This function returns the harmonic *cost* of a trace. I.e., the lower value it returns, the better
    J_pi = 0
    for w in range(len(importances)):
        # If the source never changed during the observation period, its contribution to the penalty is 0
        if (not ch_hists[w]):
            continue

        # Otherwise, if there were changes but no crawls, count the number of changes before the horizon
        if (not crawl_hists[w]):
            num_changes = 0
            for t in range(len(ch_hists[w])):
                if (ch_hists[w][t] <= horizon):
                    num_changes += 1
                else:
                    break
            J_pi += (importances[w] * Harmonic(num_changes) / horizon)
            continue

        curr_num_changes = 0
        running_penalty = 0
        curr_change_time_idx = 0
        curr_crawl_time_idx = 0

        while (curr_change_time_idx < len(ch_hists[w]) and ch_hists[w][curr_change_time_idx] <= horizon):
            # To continue the loop below we need to have either (a) unprocessed crawls s.t. their crawl times are after the current
            # change time (but before the horizon), or (b) no such crawls, but the time horizon hasn't been reached yet.
            while (((curr_crawl_time_idx >= len(crawl_hists[w]) or crawl_hists[w][curr_crawl_time_idx] > horizon) and ch_hists[w][curr_change_time_idx] <= horizon) or \
                (curr_crawl_time_idx < len(crawl_hists[w]) and crawl_hists[w][curr_crawl_time_idx] <= horizon and ch_hists[w][curr_change_time_idx] <= crawl_hists[w][curr_crawl_time_idx])):
                # If the time of a crawl coincides with the time of a change exactly (this can happen in the case of sources with complete
                # change observations), don't count this change -- we assume it is picked up immediately. Just advance to the next change
                # time.

                if (curr_crawl_time_idx >= len(crawl_hists[w]) or \
                    not(ch_hists[w][curr_change_time_idx] == crawl_hists[w][curr_crawl_time_idx])):
                    curr_num_changes += 1


                if (curr_crawl_time_idx < len(crawl_hists[w]) and \
                    ch_hists[w][curr_change_time_idx] == crawl_hists[w][curr_crawl_time_idx]):
                    curr_change_time_idx += 1
                    break

                curr_change_time_idx += 1

                if (curr_change_time_idx >= len(ch_hists[w])):
                    break

            # tally the changes we missed
            running_penalty += (0 if curr_num_changes == 0 else Harmonic(curr_num_changes))
            curr_num_changes = 0
            curr_crawl_time_idx += 1

        J_pi += (importances[w] * running_penalty / horizon)

    return J_pi / len(importances)


# Extends the change history for a set of sources from curr_time up to horizon.
#
# To extend the history for a given source, samples change times from the source's Poisson change process with a given rate parameter.
def ExtendChangeHistory(ch_hists, ch_rates, curr_time, horizon):
    for w in range(len(ch_hists)):
        if (len(ch_hists[w]) == 0):
            ch_hists[w].append(np.random.exponential(1.0 / ch_rates[w]))
        while (ch_hists[w][-1] < curr_time + horizon):
            ch_hists[w].append(ch_hists[w][-1] + np.random.exponential(1.0 / ch_rates[w]))


# Extends the observation history for sources with incomplete change observations.
#
# This method assumes that both the change and crawl history for sources with incomplete observations has *already* been extended
# until (or just past) curr_time + horizon. This means that for each such source both its change history and its crawl history is
# assumed to contain at least one element.
#
# WARNING: DO NOT CALL THIS METHOD TWICE WITH THE SAME (curr_time, horizon) PAIR. calling this method twice with the same (curr_time, horizon) may
# reappend existing observations and thereby invalidate the observation history.
def ExtendObsHistory_IncomplObs(ch_hists, crawl_hists, observations_incompl_obs, curr_time, horizon):
    for w in range(len(observations_incompl_obs)):
        # First, find the last crawl in the time interval of interest, [curr_time, curr_time + horizon]. It's possible that the latest
        # scheduled crawl overall is after curr_time+horizon and therefore hasn't happened yet -- ignore it
        i = -1
        while (abs(i) <= len(crawl_hists[w]) and crawl_hists[w][i] > curr_time + horizon):
            i -= 1

        if (abs(i) > len(crawl_hists[w]) or crawl_hists[w][i] <= curr_time):
            # No crawls happened during time interval [curr_time, curr_time + horizon], so no new observations
            continue;

        i_last_crawl_in_interval = i

        # Now, find the first crawl in the interval [curr_time, curr_time + horizon]
        while (abs(i) <= len(crawl_hists[w]) and crawl_hists[w][i] > curr_time):
            i -= 1

        # Go back to i before the last subtraction. That's the index of the first crawl timestamp in the interval
        # [curr_time, curr_time + horizon]
        i_first_crawl_in_interval = i + 1

        # For each crawl between and including these two, we need to determine whether we would observe any changes since the previous
        # crawl. Note that i_crawl is always non-positive
        for i_crawl in range(i_first_crawl_in_interval, i_last_crawl_in_interval + 1):
            previous_crawl_time = crawl_hists[w][i_crawl - 1] if (abs(i_crawl - 1) <= len(crawl_hists[w])) else 0

            # Find the index of the timestamp of the latest change before the latest crawl
            j = -1
            while (abs(j) <= len(ch_hists[w]) and ch_hists[w][j] > crawl_hists[w][i_crawl]):
                j -= 1

            if (abs(j) > len(ch_hists[w])):
                observations_incompl_obs[w].append((crawl_hists[w][i_crawl], 0))
            else:
                # If this timestamp is after the previous crawl time, we know that there has been at least one change since the previous
                # crawl (although there may have been more -- we wouldn't be able to tell the difference based on this observation!)
                # and record this in our observation history. Conversely, we also know that if this timestamp is before the previous crawl
                # time, there could not have been any change since the previous crawl, so we would observe no changes
                if (ch_hists[w][j] > previous_crawl_time):
                    observations_incompl_obs[w].append((crawl_hists[w][i_crawl], 1))
                else:
                    observations_incompl_obs[w].append((crawl_hists[w][i_crawl], 0))


# Extends the crawl history for sources with complete change observations.
#
# This method assumes that the change history has *already* been extended up to curr_time + horizon
# For every change in the interval [curr_time, curr_time + horizon] it then decides whether to crawl
# at that change's time or not.
#
# WARNING! DO NOT CALL THIS METHOD TWICE WITH THE SAME (curr_time, horizon) PAIR. Calling this method twice on the same change history for the same
# curr_time, horizon will resample a crawl for every change and append the sampled timestamps onto the crawl history, thereby invalidating it.
def ExtendCrawlHistory_ComplObs(ch_hists, crawl_hists, crawl_probs_compl_obs, curr_time, horizon):
    for w in range(len(crawl_hists)):
        # Find the index of the first change timestamp in [curr_time, curr_time + horizon]
        i = 0
        while ((abs(i - 1) <= len(ch_hists[w])) and ch_hists[w][i - 1] > curr_time):
            i -= 1

        while (i < 0 and ch_hists[w][i] <= curr_time + horizon):
            if (np.random.binomial(1, crawl_probs_compl_obs[w]) == 1):
                crawl_hists[w].append(ch_hists[w][i])
            i += 1


# Extends the crawl history for sources with incomplete change observations.
#
# This method assumes that the change history has *already* been extended up to curr_time + horizon
#
# WARNING! DO NOT CALL THIS METHOD TWICE WITH THE SAME (curr_time, horizon) PAIR. Calling this method twice on the same change history for the same
# curr_time, horizon will resample crawls and append the sampled timestamps onto the crawl history, thereby invalidating it.
def ExtendCrawlHistory_IncomplObs(crawl_hists, crawl_rates_incompl_obs, curr_time, horizon):
    for w in range(len(crawl_hists)):
        if (not crawl_hists[w]):
            crawl_hists[w].append(np.random.exponential(1.0 / crawl_rates_incompl_obs[w]))
        else:
            # Remove all crawl times, sampled using a previous crawl rate, that are scheduled after the first timestamp
            # sampled using the current crawl rate
            first_sample = curr_time + np.random.exponential(1.0 / crawl_rates_incompl_obs[w])
            while (crawl_hists[w] and crawl_hists[w][-1] > first_sample):
                del crawl_hists[w][-1]

            crawl_hists[w].append(first_sample)

        while (curr_time + horizon > crawl_hists[w][-1]):
            crawl_hists[w].append(crawl_hists[w][-1] + np.random.exponential(1.0 / crawl_rates_incompl_obs[w]))


# Pre-processes the dataset from the NeurIPS-2019 paper (https://github.com/microsoft/Optimal-Freshness-Crawl-Scheduling, see the README for the #
# dataset format) for consumption by the scheduling algorithms. Produces two files, one for URLS with complete and one for URLs with incomplete
# change observations. Each file has the tab-separated format
#
# Imp_1     ChangeRate_1
# ...       ...
# Imp_N     ChangeRate_N
#
# where Imp_i and ChangeRate_i are the importance score and change rate of some URL. For complete-change-observation URLs both of these pieces
# of data are already in the dataset itself. For incomplete-change-observation URLs the change rates need to be learned from the dataset's crawl
# and observation history.
def ProcessRawData(imps_data_file, changes_data_file, change_rates_compl_obs_data_file, \
                                out_incompl_obs_file="imps_and_chrates_incompl.txt", out_compl_obs_file="imps_and_chrates_compl.txt", delimiter='\t'):
    # Dictionary format: key is the URL ID, value is a triplet [importance, change rate, flag with value 1 if the URL has a complete observation
    # history available and 0 otherwise]
    url_data = dict()
    print("Reading the importance scores data...")
    with open(imps_data_file) as imps_data:
        line = imps_data.readline()
        while line:
            tokens = line.strip().split(delimiter, 1)
            url_data[int(tokens[0])] = [int(tokens[1]), 0, 0]
            line = imps_data.readline()

    print("Learning the change rates for URLs with incomplete change observations...")
    num_high_chrates = 0;
    with open(changes_data_file) as changes_data:
        line = changes_data.readline()
        num_lines = 1
        while line:
            tokens = line.strip().split(delimiter, 2)
            training_intervals = np.array(json.loads(tokens[2]))
            # This is a reference to training_intervals, so operations on it will change training_intervals's contents!
            training_timestamps = training_intervals
            for i in range(1, training_timestamps.shape[0]):
                training_timestamps[i, 0] += training_timestamps[i - 1, 0]

            # Here we apply Equation 10 from the NeurIPS-2019 paper to learn URLs' Poisson change rates from histories.
            learned_rate = LearnChRates_IncomplObs(training_timestamps, math.inf, sum(training_timestamps[:,0]), epsilon_learn_global)
            url_data[int(tokens[0])][1] = learned_rate

            if (num_lines % 10000 == 0):
                print("Processed {} URLs".format(num_lines))

            num_lines += 1
            line = changes_data.readline()

    print("Processing the change rates for URLs with complete change observations...")

    with open(change_rates_compl_obs_data_file) as change_rates_compl_obs_data:
        line = change_rates_compl_obs_data.readline()
        while line:
            tokens = line.strip().split(delimiter, 1)
            id = int(tokens[0])
            url_data[id][1] = float(tokens[1])
            url_data[id][2] = 1
            line = change_rates_compl_obs_data.readline()

    print("Outputting data to files...")
    incompl_obs_out = open(out_incompl_obs_file,"w+")
    num_incompl_obs_records = 0
    compl_obs_out = open(out_compl_obs_file,"w+")
    num_compl_obs_records = 0

    for record in url_data.values():
        if (record[2] == 0):
            incompl_obs_out.write("%d\t%f\n" % (record[0], record[1]))
            num_incompl_obs_records += 1
        elif (record[2] == 1):
            compl_obs_out.write("%d\t%f\n" % (record[0], record[1]))
            num_compl_obs_records += 1

    print("A total of {} records written, {} for incomplete- and {} for complete-change-observation URLs.".format(num_incompl_obs_records + num_compl_obs_records, num_incompl_obs_records, num_compl_obs_records))


# This method computes the policy and its harmonic and binary costs for LambdaCrawl, LambdaCrawlApprox, BinaryLambdaCrawl(0.0),
# BinaryLambdaCrawl(0.4), UniformCrawl, and ChangeRateProportionalCrawl, assuming known change rates. The inputs are 2D arrays containing importance-
# change rate pairs for incomplete- and complete-change-observation sources.
def ExperimentHelper(importances_and_ch_rates_incompl_obs, importances_and_ch_rates_compl_obs):
    # We can choose to subsample and experiment with a subset of the whole dataset.
    size = importances_and_ch_rates_incompl_obs.shape[0] + importances_and_ch_rates_compl_obs.shape[0]
    # Determine the fraction of the overall dataset to sample in the current iteration
    sample_frac = size / (importances_and_ch_rates_incompl_obs.shape[0] + importances_and_ch_rates_compl_obs.shape[0])

    if (sample_frac >= 1.0):
        importances_and_ch_rates_incompl_obs_partial = importances_and_ch_rates_incompl_obs
        importances_and_ch_rates_compl_obs_partial = importances_and_ch_rates_compl_obs
    else:
        # Sample this fraction of a subset of the sources with incomplete change observations
        if (importances_and_ch_rates_incompl_obs.shape[0] > 0):
            importances_and_ch_rates_incompl_obs_partial = importances_and_ch_rates_incompl_obs[np.random.choice( \
                importances_and_ch_rates_incompl_obs.shape[0], int(sample_frac * importances_and_ch_rates_incompl_obs.shape[0]), replace=False),:]
        else:
            importances_and_ch_rates_incompl_obs_partial = np.empty([0,2])

        # Sample this fraction of a subset of the sources with complete change observations
        if (importances_and_ch_rates_compl_obs.shape[0] > 0):
            importances_and_ch_rates_compl_obs_partial = importances_and_ch_rates_compl_obs[np.random.choice( \
                importances_and_ch_rates_compl_obs.shape[0], int(sample_frac * importances_and_ch_rates_compl_obs.shape[0]), replace=False),:]
        else:
            importances_and_ch_rates_compl_obs_partial = np.empty([0,2])

    importances_incompl_obs = importances_and_ch_rates_incompl_obs_partial[:,0]
    ch_rates_incompl_obs = importances_and_ch_rates_incompl_obs_partial[:,1]
    importances_compl_obs = importances_and_ch_rates_compl_obs_partial[:,0]
    ch_rates_compl_obs = importances_and_ch_rates_compl_obs_partial[:,1]

    bandwidth = 0.2 * (importances_and_ch_rates_incompl_obs_partial.shape[0] + importances_and_ch_rates_compl_obs_partial.shape[0])
    epsilon_incompl_obs = 0.1
    importances = np.concatenate((importances_and_ch_rates_incompl_obs_partial[:,0], importances_and_ch_rates_compl_obs_partial[:,0]))
    ch_rates = np.concatenate((importances_and_ch_rates_incompl_obs_partial[:,1], importances_and_ch_rates_compl_obs_partial[:,1]))
    print("\n*****Running on a set of ", importances_and_ch_rates_incompl_obs_partial.shape[0], \
        " sources with incomplete observations and ", importances_and_ch_rates_compl_obs_partial.shape[0], \
        " sources with complete observations.", sep='')

    #==================================================================================
    # Random
    print("\n*****Running UniformCrawl.")
    crawl_rates =  np.ones_like(importances) * bandwidth / len(importances)
    policy_cost_harmonic = HarmonicPolicyCost(crawl_rates, [], importances, ch_rates, [], [])
    policy_cost_binary = BinaryPolicyCost(crawl_rates, [], importances, ch_rates, [], [])
    print("\tHarmonic policy cost was ", policy_cost_harmonic, ".\n\tBinary policy cost was ", policy_cost_binary, ".", sep='')
    #===================================================================================

    #==================================================================================
    # Change-rate-proportional
    print("\n")
    print("*****Running ChangeRateProportionalCrawl on a set of ", importances_and_ch_rates_incompl_obs_partial.shape[0], \
        " sources with incomplete observations and ", importances_and_ch_rates_compl_obs_partial.shape[0], \
        " sources with complete observations.", sep='')
    crawl_rates = ch_rates * bandwidth / sum(ch_rates)
    policy_cost_harmonic = HarmonicPolicyCost(crawl_rates, [], importances, ch_rates, [], [])
    policy_cost_binary = BinaryPolicyCost(crawl_rates, [], importances, ch_rates, [], [])
    print("\tHarmonic policy cost was ", policy_cost_harmonic, ".\n\tBinary policy cost was ", policy_cost_binary, ".", sep='')
    #===================================================================================

    #===================================================================================
    # Epsilon-lambda-crawl-binary with the best parameter value for harmonic-cost performance on the NeurIPS-2019 paper's dataset.
    print("\n")
    print("*****Running BinaryLambdaCrawl-Epsilon (**all sources are treated as having incomplete change observations**).")
    epsilon = 0.4
    crawl_rates_incompl_obs, crawl_probs_compl_obs = LambdaCrawlBinary_Epsilon(importances, ch_rates, epsilon, [], [], bandwidth)
    policy_cost_harmonic = HarmonicPolicyCost(crawl_rates_incompl_obs, [], importances, ch_rates, [], [])
    policy_cost_binary = BinaryPolicyCost(crawl_rates_incompl_obs, [], importances, ch_rates, [], [])
    print("\tHarmonic policy cost with epsilon=", epsilon," was: ", policy_cost_harmonic, ".\n\tBinary policy cost with epsilon=", \
                                                                                                    epsilon," was: ", policy_cost_binary, ".", sep='')
    #===================================================================================

    #===================================================================================
    # Vanilla Epsilon-lambda-crawl-binary
    print("\n")
    print("*****Running BinaryLambdaCrawl-Epsilon (**all sources are treated as having incomplete change observations**).")
    epsilon = 0.0
    crawl_rates_incompl_obs, crawl_probs_compl_obs = LambdaCrawlBinary_Epsilon(importances, ch_rates, epsilon, [], [], bandwidth)
    policy_cost_harmonic = HarmonicPolicyCost(crawl_rates_incompl_obs, [], importances, ch_rates, [], [])
    policy_cost_binary = BinaryPolicyCost(crawl_rates_incompl_obs, [], importances, ch_rates, [], [])
    print("\tHarmonic policy cost with epsilon=", epsilon," was: ", policy_cost_harmonic, ".\n\tBinary policy cost with epsilon=", \
                                                                                                    epsilon," was: ", policy_cost_binary, ".", sep='')
    #===================================================================================

    #==================================================================================================
    # LambdaCrawlApprox
    print("\n")
    print("*****Running LambdaCrawlApprox.")
    crawl_rates_approx_incompl_obs, crawl_probs_approx_compl_obs = LambdaCrawlApprox(importances_incompl_obs, ch_rates_incompl_obs, \
                                                                            epsilon_incompl_obs, importances_compl_obs, ch_rates_compl_obs, bandwidth)
    policy_cost_harmonic = HarmonicPolicyCost(crawl_rates_approx_incompl_obs, crawl_probs_approx_compl_obs, importances_incompl_obs, \
                                                                                    ch_rates_incompl_obs, importances_compl_obs, ch_rates_compl_obs)
    policy_cost_binary = BinaryPolicyCost(crawl_rates_approx_incompl_obs, crawl_probs_approx_compl_obs, importances_incompl_obs, \
                                                                                    ch_rates_incompl_obs, importances_compl_obs, ch_rates_compl_obs)
    print("\tHarmonic policy cost was: ", policy_cost_harmonic, ".\n\tBinary policy cost was: ", policy_cost_binary, ".", sep='')
    #===============================================================================================================================

    #===============================================================================================================================
    # LambdaCrawlApprox ignoring complete change observations
    print("\n")
    print("*****Running LambdaCrawlApprox (**all sources are treated as having incomplete change observations**).")
    crawl_rates_approx_incompl_obs, crawl_probs_approx_compl_obs = LambdaCrawlApprox(importances, ch_rates, epsilon_incompl_obs, [], [], bandwidth)
    policy_cost_harmonic = HarmonicPolicyCost(crawl_rates_approx_incompl_obs, crawl_probs_approx_compl_obs, importances, ch_rates, [], [])
    policy_cost_binary = BinaryPolicyCost(crawl_rates_approx_incompl_obs, crawl_probs_approx_compl_obs, importances, ch_rates, [], [])
    print("\tHarmonic policy cost was: ", policy_cost_harmonic, ".\n\tBinary policy cost was: ", policy_cost_binary, ".", sep='')
    #===============================================================================================================================

    #===============================================================================================================================
    # LambdaCrawl
    print("\n")
    print("*****Running LambdaCrawl.")
    crawl_rates_incompl_obs, crawl_probs_compl_obs = LambdaCrawl(importances_incompl_obs, ch_rates_incompl_obs, epsilon_incompl_obs, \
                                                                                                importances_compl_obs, ch_rates_compl_obs, bandwidth)
    policy_cost_harmonic = HarmonicPolicyCost(crawl_rates_incompl_obs, crawl_probs_compl_obs, importances_incompl_obs, ch_rates_incompl_obs, \
                                                                                                            importances_compl_obs, ch_rates_compl_obs)
    policy_cost_binary = BinaryPolicyCost(crawl_rates_incompl_obs, crawl_probs_compl_obs, importances_incompl_obs, ch_rates_incompl_obs, \
                                                                                                            importances_compl_obs, ch_rates_compl_obs)
    print("\tHarmonic policy cost was: ", policy_cost_harmonic, ".\n\tBinary policy cost was: ", policy_cost_binary, ".", sep='')
    #===============================================================================================================================

    #===============================================================================================================================
    # LambdaCrawl ignoring complete change observations
    print("\n")
    print("*****Running LambdaCrawl (**all sources are treated as having incomplete change observations**).")
    crawl_rates_incompl_obs, crawl_probs_compl_obs = LambdaCrawl(importances, ch_rates, epsilon_incompl_obs, [], [], bandwidth)
    policy_cost_harmonic = HarmonicPolicyCost(crawl_rates_incompl_obs, crawl_probs_compl_obs, importances, ch_rates, [], [])
    policy_cost_binary = BinaryPolicyCost(crawl_rates_incompl_obs, crawl_probs_compl_obs, importances, ch_rates, [], [])
    print("\tHarmonic policy cost was: ", policy_cost_harmonic, ".\n\tBinary policy cost was: ", policy_cost_binary, ".", sep='')


# Compute means and confidence intervals. Adapted from the Stack Overflow post at
# https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
def mean_confidence_interval(data, confidence = 0.95):
    a = 1.0 * data
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, h


def process_results(runs):
    ms = np.zeros_like(runs[0,:])
    hs = np.zeros_like(ms)

    for j in range(runs.shape[1]):
        m, h = mean_confidence_interval(runs[:,j])
        ms[j] = m
        hs[j] = h

    return ms, hs


# This method produces results as is Figure 1 of the NeurIPS-2019 paper. It computes the policy and its harmonic and binary costs for LambdaCrawl,
# LambdaCrawlApprox, BinaryLambdaCrawl(0.0), BinaryLambdaCrawl(0.4), UniformCrawl, and ChangeRateProportionalCrawl, assuming known change rates,
# on the full dataset from the NeurIPS-2019 paper.
def Experiment1(importances_and_ch_rates_incompl_obs_file = "imps_and_chrates_incompl.txt", \
                                                                                importances_and_ch_rates_compl_obs_file="imps_and_chrates_compl.txt"):
    print("Reading the data...")
    importances_and_ch_rates_incompl_obs = np.genfromtxt(importances_and_ch_rates_incompl_obs_file, delimiter="\t")
    importances_and_ch_rates_compl_obs = np.genfromtxt(importances_and_ch_rates_compl_obs_file, delimiter="\t")
    ExperimentHelper(importances_and_ch_rates_incompl_obs, importances_and_ch_rates_compl_obs)


# This method produces results as is Figure 2 of the NeurIPS-2019 paper. It computes the policy and its harmonic and binary costs for LambdaCrawl,
# LambdaCrawlApprox, BinaryLambdaCrawl(0.0), and BinaryLambdaCrawl(0.4) on the set of URLs with complete observations only. Out of the above
# algorithms, only LambdaCrawl and LambdaCrawlApprox have special handling for these observations, the rest ignore them. For the sake of comparison,
# we also run LambdaCrawl and LambdaCrawlApprox in "ignorance mode" as well as in normal mode.
def Experiment2(importances_and_ch_rates_compl_obs_file="imps_and_chrates_compl.txt"):
    print("Reading the data...")
    importances_and_ch_rates_incompl_obs = np.empty([0,2])
    importances_and_ch_rates_compl_obs = np.genfromtxt(importances_and_ch_rates_compl_obs_file, delimiter="\t")
    ExperimentHelper(importances_and_ch_rates_incompl_obs, importances_and_ch_rates_compl_obs)


# This method produces results as is Figure 3 of the NeurIPS-2019 paper. It simulates runs of the RL algorithms LambdaLearnAndCrawl,
# LambdaLearnAndCrawlApprox, and BinaryLambdaLearnAndCrawl using the ground-truth Poisson change rates previously learned from the dataset,
# averages their results, and outputs the means and confidence intervals in a tikzpicture plot format.
def Experiment3(importances_and_ch_rates_incompl_obs_file = "imps_and_chrates_incompl.txt", \
                                                                                importances_and_ch_rates_compl_obs_file="imps_and_chrates_compl.txt"):
    print("Reading the data...")
    importances_and_ch_rates_incompl_obs = np.genfromtxt(importances_and_ch_rates_incompl_obs_file, delimiter="\t")
    importances_and_ch_rates_compl_obs = np.genfromtxt(importances_and_ch_rates_compl_obs_file, delimiter="\t")
    size = 100000
    # Determine the fraction of the overall dataset to sample
    sample_frac = size / (importances_and_ch_rates_incompl_obs.shape[0] + importances_and_ch_rates_compl_obs.shape[0])
    epoch_length = 1
    horizon = 21
    num_runs = 20
    epsilon_incompl_obs = 0.1
    LLC_runs = np.empty([num_runs,horizon])
    LLCA_runs = np.empty([num_runs,horizon])
    LLCB_runs = np.empty([num_runs,horizon])

    for run in range(num_runs):
        print("%%%%%%  EPISODE ", run + 1, " %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")

        # Sample this fraction of the subset of the sources with incomplete change observations
        importances_and_ch_rates_incompl_obs_partial = importances_and_ch_rates_incompl_obs[np.random.choice( \
            importances_and_ch_rates_incompl_obs.shape[0], int(sample_frac * importances_and_ch_rates_incompl_obs.shape[0]), replace=False),:]
        importances_incompl_obs = importances_and_ch_rates_incompl_obs_partial[:,0]
        ch_rates_incompl_obs_actual = importances_and_ch_rates_incompl_obs_partial[:,1]

        # Sample this fraction of the subset of the sources with complete change observations
        importances_and_ch_rates_compl_obs_partial = importances_and_ch_rates_compl_obs[np.random.choice( \
            importances_and_ch_rates_compl_obs.shape[0], int(sample_frac * importances_and_ch_rates_compl_obs.shape[0]), replace=False),:]
        importances_compl_obs = importances_and_ch_rates_compl_obs_partial[:,0]
        ch_rates_compl_obs_actual = importances_and_ch_rates_compl_obs_partial[:,1]

        # Set the bandwidth constraint to 20\% of the total number of sources
        bandwidth = 0.2 * (importances_and_ch_rates_incompl_obs_partial.shape[0] + importances_and_ch_rates_compl_obs_partial.shape[0])

        # use the entire available observation history for learning
        suffix_len = math.inf

        policy_cost, total_time, idealized_policy_costs_harmonic, idealized_policy_costs_binary = LambdaLearnAndCrawl(importances_incompl_obs, ch_rates_incompl_obs_actual , epsilon_incompl_obs, \
            importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn_global, epoch_length, suffix_len, horizon)
        LLC_runs[run,:] = idealized_policy_costs_harmonic[:]
        print("LambdaLearnAndCrawl's harmonic costs: ", idealized_policy_costs_harmonic)

        print("\n")
        policy_cost_llaca, total_time_llaca, idealized_policy_costs_llaca_harmonic, idealized_policy_costs_llaca_binary = LambdaLearnAndCrawlApprox(\
                                                                        importances_incompl_obs, ch_rates_incompl_obs_actual , epsilon_incompl_obs, \
                                importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn_global, epoch_length, suffix_len, horizon)
        LLCA_runs[run,:] = idealized_policy_costs_harmonic[:]
        print("LambdaLearnAndCrawlApprox's harmonic costs: ", idealized_policy_costs_llaca_harmonic)

        print("\n")
        policy_cost_llcb, total_time_llcb, idealized_policy_costs_llcb_harmonic, idealized_policy_costs_llcb_binary = LambdaLearnAndCrawlBinary(\
                                                                                        importances_incompl_obs, ch_rates_incompl_obs_actual , 0.4, \
                                importances_compl_obs, ch_rates_compl_obs_actual, bandwidth, epsilon_learn_global, epoch_length, suffix_len, horizon)

        LLCB_runs[run,:] = idealized_policy_costs_harmonic[:]
        print("BinaryLambdaLearnAndCrawl's harmonic costs: ", idealized_policy_costs_harmonic)

        print("\n")

    LLC_ms, LLC_hs = process_results(LLC_runs)
    LLC_plot_string = ""
    for i in range(LLC_ms.shape[0]):
        LLC_plot_string += "(" + str(i) + "," + str(LLC_ms[i]) + ")+=(0," + str(LLC_hs[i]) + ")-=(0," + str(LLC_hs[i]) +")\n"

    print("LLC:\n", LLC_plot_string, "\n\n", LLC_limit_plot_string)

    LLCA_ms, LLCA_hs = process_results(LLCA_runs)
    LLCA_plot_string = ""
    for i in range(LLCA_ms.shape[0]):
        LLCA_plot_string += "(" + str(i) + "," + str(LLCA_ms[i]) + ")+=(0," + str(LLCA_hs[i]) + ")-=(0," + str(LLCA_hs[i]) +")\n"

    print("LLCA:\n", LLCA_plot_string, "\n\n", LLCA_limit_plot_string)

    LLCB_ms, LLCB_hs = process_results(LLCB_runs)
    LLCB_plot_string = ""
    for i in range(LLCB_ms.shape[0]):
        LLCB_plot_string += "(" + str(i) + "," + str(LLCB_ms[i]) + ")+=(0," + str(LLCB_hs[i]) + ")-=(0," + str(LLCB_hs[i]) +")\n"

    print("BLLC:\n", LLCB_plot_string, "\n\n", LLCB_limit_plot_string)