

/*
    Damon's estimator for bunching:

    1. work with binned data; bins should go ... [z^*-delta, z^*), [z^*,z^*+delta),...
    2. Estimate the following regression:

    Bin Count =     (polynimal of degree D in z)*1{z < z^*}
                +   (polynomial of degreeD in z)*1{z >= z^*}
                +  dummies for each z in some excluded region

    Excess mass at kink =   - sum of dummies.
                            - easy to compute standard errors via delta method.
    Report: excess mass standardized by ex post density near the kink,
        i.e.  (excess mass)/(SUM of predicted mass - excess mass)

    Also report excess mass at kink
    
    ASSUMES EARNINGS ARE AT THE MIDPOINT OF THE CELL,
    SO THAT BINS (kink-bw/2), (kink+bw/2) ARE IN THE DATA 
    
*/



/* make a program for bunching */
capture program drop bunchPoly
program define bunchPoly, rclass
    # delimit ;
    syntax varlist (min=2 max=2) [if], 
        /* mandatory stuff */
        kink(real)                      /* location of kink */
        bw(real)                        /* size of bins */
        exclude(int)                    /* number of bins to the left and right that we exclude */
        degree(int)                     /* degree of the polynomial */
        
        [keep(int 0)]                   /* optional: zero to drop non-bunch mass, otherwise keep */
        [print(int 0)]                  /* optional: zero not to print results, otherwise print */
        [nboot(int 100)]                /* optional: number of bootstrap samples; setting to zero would be bad, but 1 is ok */
    ;
    # delimit cr
    local earnings:     word 1 of `varlist'
    local prob:         word 2 of `varlist'
    
    marksample touse
    quietly{

    
    /*******************************************************************************************************************/
    /* error checks: count and earnings non-missing, no holes in earnings distribution, sufficient data to estimate on */
    /*******************************************************************************************************************/
        tempvar earnShift
        if `kink' != 0{
            gen `earnShift'=`earnings'-`kink' if `touse'
        }
        if `kink' == 0{
            gen `earnShift' = `earnings' if `touse'
        }
        
        /* check 1: prob non-missing */
        count if `touse' & missing(`prob') 
        local n = r(N)
        if `n' > 0{
            noisily di "Error: `n' observations missing probability variable"
            exit 1
        }

        /* count 2: earnings non-missing */
        count if `touse' & missing(`earnShift')
        local n = r(N)
        if `n' > 0{
            noisily di "Error: `n' observations missing earnings variable"
            exit 2
        }
        
        /* check 3: no holes in earnings distribution */
        desc, varlist
        local sortList = r(sortlist)
        gsort -`touse' `earnShift'
        tempvar trouble
        gen `trouble' = ~( (abs(`earnShift' - `earnShift'[_n-1]-`bw')<1e-7) | (_n==1) | !`touse')
        count if `trouble'
        local nt = r(N)
        if `nt' > 0 {
            noisily di "Error: there are missing observations in the income distribution (`nt' Problems)"
            exit 3
        }
        
        /* check 4: sufficient data to estimate on */
        count if `touse' 
        local nBins = r(N)
        
        if `nBins'<=(`degree'+`exclude'){
            noisily di "Error: there are `nBins' bins,but `degree'+`exclude' required"
            exit 4        
        }
        
        /* check 5: d, exclude >= 1 */
        if `degree' < 1{
            noisily di "Error: degree of polynomial must be at least 1"
            exit 6
        }
        
        if `exclude'<1{
            noisily di "Error: at least one bin must be exluded"
            exit 7
        }

    /*******************************************************************************************************************/
    /* create variables used in estimation                                                                             */
    /*******************************************************************************************************************/
                
        /* polynomial terms, dummies */
        local degrees ""
        forvalues deg = 1/`degree'{
        
            tempvar d`deg'
            gen `d`deg'' = `earnShift'^(`deg')
            local degrees "`degrees' `d`deg''"
        }
        
        local n = 0
        local nExclude = 1+2*`exclude'
        local excludeList ""
        local addEM ""
    
        forvalues exc = -`exclude'/`exclude'{
            local ++n
            tempvar exc`n'
            gen `exc`n'' = (`earnShift' == `exc'*`bw')
            local excludeList "`excludeList' `exc`n''"
            
            if `n' == 1 local addEM "_b[`exc`n'']"
            if `n' >= 2 local addEM "`addEM'+_b[`exc`n'']" 
        }
        
    /*******************************************************************************************************************/
    /* run regressions, calculate main statistics of interesting                                                       */
    /*******************************************************************************************************************/
        
        /* regression */
        noi reg `prob' `degrees' `excludeList' if `touse'
				noi di "blerg git"
        tempvar res fit
        predict `res' if `touse', res
        predict `fit' if `touse', xb
        
        /* record cfProbability */
        local cfProb = _b[_cons]
        local cfProbSE = _se[_cons]
        
        /* excess mass, scaled excess mass */
        noi lincom `addEM'
        local excessMass = r(estimate)
        local excessMassSE = r(se)
        
        nlcom (scaledMass: (`addEM')/_b[_cons] ), post
        local scaledMass = _b[scaledMass]
        local scaledMassSE = _se[scaledMass]
        
        /* if keep, make a variable containing the smooth density */
        if `keep'{
            cap desc smoothDensity
            if _rc == 0{
                noisily di "Warning: replacing smoothDensity"
                drop smoothDensity
            }
            cap desc fit
            if _rc == 0{
                noisily di "Warning: replacing fit"
                drop fit
            }
            
            noisily di "pre fit"
            reg `prob' `degrees' `excludeList' if `touse'
            predict fit if `touse', xb
            gen smoothDensity = fit
            noisil di "post gen"
            
            forvalues n = 1/`nExclude'{
                replace smoothDensity = smoothDensity - _b[`exc`n'']*(`exc`n'') if `touse'
            }
            noisily di "post smooth"
        }
        
        /* implement CFOP-style bootstrap, resampling from residuals */
        tempvar probBS drawFrom cfBS emBS scaledBS 
        gen `probBS' = .
        gen `drawFrom' = .
        gen `cfBS' = .
        gen `emBS' = .
        gen `scaledBS' = .
        
        count if `touse'
        local N = r(N)
        
        count 
        local nTotal = r(N)
        if `nboot'>`nTotal' set obs `nboot'
        forvalues bs = 1/`nboot'{
        
            /* resample--note that sort order (and in particular that touse is first) is important */
            replace `drawFrom' = ceil(`N'*uniform()) if `touse'
            replace `probBS' = `fit'+`res'[`drawFrom'] if `touse'
            
            /* regression, save results */
            reg `probBS' `degrees' `excludeList' if `touse'
            replace `cfBS' = _b[_cons] in `bs'
            replace `emBS' = `addEM' in `bs'
            replace `scaledBS' = (`addEM')/_b[_cons] in `bs'
            
        }

        sum `emBS'
        local excessMassBS = r(sd)
        sum `cfBS'
        local cfProbBS = r(sd)
        sum `scaledBS' 
        local scaledMassBS = r(sd)

        /* preserve order, etc */
        if "`sortList'" != "." sort `sortList'
        if "`sortList'" == "." noisily di "Warning: data unsorted before, now sorted"
        if "`sortList'" == "." noisily di "         If you run bunchPoly on sorted data, the sort order will be preserved."
        
        if `nboot'>`nTotal' keep in 1/`nTotal'
        
    /*******************************************************************************************************************/
    /* print, if option is specified                                                                                   */
    /*******************************************************************************************************************/
        
        if `print'{
        
            local emSTR = string(`excessMass', "%9.4f")
            local emSESTR = string(`excessMassSE', "%9.4f")
            local emBSSTR = string(`excessMassBS', "%9.4f")
            
            local cfSTR = string(`cfProb', "%9.4f")
            local cfSESTR = string(`cfProbSE', "%9.4f")
            local cfBSSTR = string(`cfProbBS', "%9.4f")
            
            local smSTR = string(`scaledMass', "%9.4f")
            local smSESTR = string(`scaledMassSE', "%9.4f")
            local smBSSTR = string(`scaledMassBS', "%9.4f")
            
            noisily di "Excess mass: `emSTR' (Delta method SE: `emSESTR' ; bootstrap SE: `emBSSTR')"
            noisily di "Counterfactual probability: `cfSTR' (Delta method SE: `cfSESTR' ; bootstrap SE: `cfBSSTR')"
            noisily di "Excess mass scaled: `smSTR' (Delta method SE: `smSESTR' ; bootstrap SE: `smBSSTR')"
        }
        
    /*******************************************************************************************************************/
    /* return everything                                                                                               */
    /*******************************************************************************************************************/
        
        
        return scalar excessMass = `excessMass'
        return scalar excessMassSE = `excessMassSE'
        return scalar excessMassBS = `excessMassBS'
        
        return scalar cfProb = `cfProb'
        return scalar cfProbSE = `cfProbSE'
        return scalar cfProbBS = `cfProbBS'
        
        return scalar scaledMass = `scaledMass'
        return scalar scaledMassSE = `scaledMassSE'
        return scalar scaledMassBS = `scaledMassBS'       
        
    } /* close quietly */  
end




