//========================================================= file = bfSim.c =====
//= Simulation to determine Pr[false positive] for a Bloom filter              =
//==============================================================================
//= Notes:                                                                     =
//=   1) Uses an RNG to generate hashes to map into Bloom filter               =
//=   2) Need to set NUM_EXPER, M, N, and K in #define section                 =
//=   3) Note that M must be less than MAX in #define section                  =
//=   4) Computes Pr[false positive] in three different ways                   =
//=       - From theory formula                                                =
//=       - As a running sum of sample of number of 1s set (experiment #1)     =
//=       - As a running sum of sample Pr[false positives] (experiment #2)     =
//=----------------------------------------------------------------------------=
//= Example execution:                                                         =
//=   Running...                                                               =
//=   ================================================== bfSim.c =====         =
//=   = Number of experiments = 1000000                                        =
//=   = m = 32                                                                 =
//=   = n = 8                                                                  =
//=   = k = 4                                                                  =
//=   =---------------------------------------------------------------         =
//=   = Mean number of 1s = 20.412851000000000                                 =
//=   = Pr[false positive] from theory   = 0.165627392232887                   =
//=   = Pr[false positive] experiment #1 = 0.165582619504849                   =
//=   = Pr[false positive] experiment #2 = 0.173061714079857                   =
//=   ================================================================         =
//=----------------------------------------------------------------------------=
//=  Build: bcc32 bfSim.c                                                      =
//=----------------------------------------------------------------------------=
//=  Execute: bfSim                                                            =
//=----------------------------------------------------------------------------=
//=  Author: Ken Christensen                                                   =
//=          University of South Florida                                       =
//=          WWW: http://www.csee.usf.edu/~christen                            =
//=          Email: christen@cse.usf.edu                                       =
//=----------------------------------------------------------------------------=
//=  History: KJC (04/11/09) - Genesis (from bon1.c)                           =
//=           KJC (04/19/09) - Did a minor clean-up                            =
//==============================================================================
//----- Include files ----------------------------------------------------------
#include <stdio.h>                 // Needed for printf()
#include <math.h>                  // Needed for pow()

//----- Constant defines -------------------------------------------------------
#define        MAX        100000   // Maximum number of bits in the Bloom filter
#define  NUM_EXPER       1000000   // Number of experiments to run
#define          M            32   // Bloom filter m value
#define          N             8   // Bloom filter n value
#define          K             4   // Bloom filter k value

//----- Function prototypes ----------------------------------------------------
unsigned int hash(void);           // Dummy function for a "string hash"
unsigned int randInt(int seed);    // LCG from Jain

//==============================================================================
//=  Main program                                                              =
//==============================================================================
void main(void)
{
  unsigned int bloom[MAX];         // The Bloom filter
  int          index;              // Bloom filter index value
  int          oneCount;           // Count of number of 1s in a Bloom filter
  int          sumOneCount;        // Sum of 1s counts
  double       pf;                 // Indivisual Pr[false positive]
  double       sumPf;              // Sum of individual Pr[false positive]
  double       meanNumOnes;        // Mean number of 1s in a Bloom filter
  double       prFalse1;           // Prob of false positive from exper (1)
  double       prFalse2;           // Prob of false positive from exper (2)
  double       prFalseTheory;      // Prob of false positive from theory
  int          i,j,k;              // Loop counters

  // Seed the RNG
  randInt(1);

  // Loop for number of experiments to run
  printf("Running... \n");
  sumOneCount = sumPf = 0;
  for (i=0; i<NUM_EXPER; i++)
  {
    // Clear the Bloom filter
    for (j=0; j<M; j++)
      bloom[j] = 0;

    // Map the Bloom filter
    for (j=0; j<N; j++)
      for (k=0; k<K; k++)
      {
        index = hash();
        bloom[index % M] = 1;
      }

    // Count the number of 1s in the Bloom filter
    oneCount = 0;
    for (j=0; j<M; j++)
      if (bloom[j] == 1)
        oneCount++;

    // Add-up sumOneCount
    sumOneCount = sumOneCount + oneCount;

    // Compute this Pr[false positive]
    pf = pow(((double) oneCount / M), K);

    // Add-up sumPf
    sumPf = sumPf + pf;
  }

  // Compute mean number of 1s and then the exprimental Pr[false positive]
  meanNumOnes = (double) sumOneCount / NUM_EXPER;
  prFalse1 = pow(((double) meanNumOnes / M), K);
  prFalse2 = sumPf / NUM_EXPER;

  // Compute the theory Pf[false positive]
  prFalseTheory = pow((1.0 - pow(1.0 - (1.0 / M), (K*N))), K);

  // Output parameters and results
  printf("================================================== bfSim.c ===== \n");
  printf("= Number of experiments = %d \n", NUM_EXPER);
  printf("= m = %d \n", M);
  printf("= n = %d \n", N);
  printf("= k = %d \n", K);
  printf("=--------------------------------------------------------------- \n");
  printf("= Mean number of 1s = %17.15f \n", meanNumOnes);
  printf("= Pr[false positive] from theory   = %17.15f \n", prFalseTheory);
  printf("= Pr[false positive] experiment #1 = %17.15f \n", prFalse1);
  printf("= Pr[false positive] experiment #2 = %17.15f \n", prFalse2);
  printf("================================================================ \n");
}

//==============================================================================
//= Dummy function for a string hash                                           =
//==============================================================================
unsigned int hash(void)
{
  unsigned int hashValue;          // Returned hash value

  // Generate a synthetic hash value using randInt()
  hashValue = randInt(0);

  // Return the hash value
  return(hashValue);
}

//==============================================================================
//= Multiplicative LCG from Jain                                               =
//==============================================================================
unsigned int randInt(int seed)
{
  const long  a =      16807;  // Multiplier
  const long  m = 2147483647;  // Modulus
  const long  q =     127773;  // m div a
  const long  r =       2836;  // m mod a
  static long x;               // Random int value
  long        x_div_q;         // x divided by q
  long        x_mod_q;         // x modulo q
  long        x_new;           // New x value

  // Set the seed value
  if (seed != 0)
    x = seed;

  // RNG using integer arithmetic
  x_div_q = x / q;
  x_mod_q = x % q;
  x_new = (a * x_mod_q) - (r * x_div_q);
  if (x_new > 0)
    x = x_new;
  else
    x = x_new + m;

  // Return a random unsigned integer
  return(x);
}

