Sales Tax Revenue

This code develops a database of Texas local government sales tax revenue.


Libraries and BLS api key handling


#### You need to get an api key from BLS.

library(RSocrata); library(tidyverse); library(RCurl); library(dplyr);
library(readr); library(dplyr); library(blsAPI); library(runner);
library(rjson); library(blscrapeR); library(skimr); library(zoo)
#set_bls_key("your-bls-api-key-goes-here")
readRenviron("~/.Renviron")
#Sys.getenv("BLS_KEY")


Older sales tax revenue

Sales tax revenue comes from my github, where I saved the csv files I received via email from the Comptroller’s Office.

url1 <- "https://raw.githubusercontent.com/skipkrueger/Data/master/TxLocalSalesTax1.csv"
otx1 <- read_csv(url(url1))
url2 <- "https://raw.githubusercontent.com/skipkrueger/Data/master/TxLocalSalesTax2.csv"
otx2 <- read_csv(url(url2))
otx3 <- rbind(otx1,otx2)
names(otx3)[names(otx3) == "TA ID"] <- "taxid"
names(otx3)[names(otx3) == "TAX AUTHORITY NAME"] <- "oldname"
names(otx3)[names(otx3) == "ALLOCATION MONTH"] <- "date"
names(otx3)[names(otx3) == "NET PAYMENT"] <- "amt"
otx3$type <- substr(otx3$taxid,1,1)
otx3 <- subset(otx3, type == 2)
otx3 <- filter(otx3, oldname!="OAK RIDGE")
otx3$city <- if_else(otx3$taxid==2139059,"RENO (LAMAR CO.)",otx3$oldname)
otx3$city <- if_else(otx3$taxid==2184062,"RENO (PARKER CO.)",otx3$oldname)
otx3 <- subset(otx3,select = -c(oldname,type,taxid))
otx3$date <- as.Date(otx3$date,"%m/%d/%Y")   # NOTE THE DATE FORMAT
head(otx3)
## # A tibble: 6 x 3
##   date           amt city     
##   <date>       <dbl> <chr>    
## 1 1990-01-01 107681. PALESTINE
## 2 1990-02-01 304147. PALESTINE
## 3 1990-03-01 123569. PALESTINE
## 4 1990-04-01 121508. PALESTINE
## 5 1990-05-01 266375. PALESTINE
## 6 1990-06-01 156792. PALESTINE


Latest sales tax revenue

New sales tax revenue comes from the Texas data hub. And some clean-up.

ntx <- read.socrata("https://data.texas.gov/resource/vfba-b57j.csv")
ntx <- subset(ntx, report_year==2020)
ntx <- subset(ntx, report_month!=3)
ntx <- subset(ntx, report_month!=2)
ntx <- subset(ntx, report_month!=1)
names(ntx)[names(ntx) == "net_payment_this_period"] <- "amt"
names(ntx)[names(ntx) == "report_month"] <- "month"
names(ntx)[names(ntx) == "report_year"] <- "year"
ntx$date <- paste(ntx$month,"/","1","/",ntx$year)      # NOTE THE DATE FORMAT
ntx$date <- gsub(" ","",ntx$date)                      # NOTE THE DATE FORMAT
ntx$date <- as.Date(ntx$date,"%m/%d/%Y")               # NOTE THE DATE FORMAT
ntx <- ntx %>% mutate_each(funs(toupper),city)
ntx <- subset(ntx, select = -c(comparable_payment_prior_year,
                               period_percent_change,
                               payments_to_date,
                               previous_payments_to_date,
                               ytd_percent_change,
                               report_period_type,month,year))
txall <- rbind(otx3,ntx)

txall <- txall %>%
  group_by(city) %>%
  mutate(lead = lead(amt,2))
#rm(url1,url2,otx1,otx2,otx3,ntx)        #OPTIONALLY, THE ENV CAN BE CLEANED UP
head(txall)
## # A tibble: 6 x 4
## # Groups:   city [1]
##   date           amt city         lead
##   <date>       <dbl> <chr>       <dbl>
## 1 1990-01-01 107681. PALESTINE 123569.
## 2 1990-02-01 304147. PALESTINE 121508.
## 3 1990-03-01 123569. PALESTINE 266375.
## 4 1990-04-01 121508. PALESTINE 156792.
## 5 1990-05-01 266375. PALESTINE 142217.
## 6 1990-06-01 156792. PALESTINE 283100.



Get the CPI data from the BLS

Note that even with a bls key, it takes 2 downloads to get all the CPI data. Without the key, you have to run 4 downloads, and can only do so a couple of times before the allowable limit is reached.

df1 <- bls_api("CUSR0000SA0",startyear = 1990, endyear = 2020,registrationKey = "BLS_KEY")
df2 <- bls_api("CUSR0000SA0",startyear = 2010, endyear = 2020,registrationKey = "BLS_KEY")
df2 <- subset(df2,select = -c(latest))

infl <- rbind(df1,df2)
infl <- subset(infl,select = -c(footnotes,seriesID))
infl$period <- substr(infl$period,2,3)
infl$date <- paste(infl$period,"/","1","/",infl$year)
infl$date <- gsub(" ","",infl$date)
infl$date2 <- as.Date(infl$date,"%m/%d/%Y") # the key is the capital 'Y'
infl$deflate <- (259.050/infl$value)        # inflation in February 
head(infl)
## # A tibble: 6 x 7
##    year period periodName value date      date2      deflate
##   <dbl> <chr>  <chr>      <dbl> <chr>     <date>       <dbl>
## 1  2009 12     December    217. 12/1/2009 2009-12-01    1.19
## 2  2009 11     November    217. 11/1/2009 2009-11-01    1.19
## 3  2009 10     October     217. 10/1/2009 2009-10-01    1.20
## 4  2009 09     September   216. 09/1/2009 2009-09-01    1.20
## 5  2009 08     August      215. 08/1/2009 2009-08-01    1.20
## 6  2009 07     July        215. 07/1/2009 2009-07-01    1.21


Join the data


txadj <- left_join(txall,infl,by=c("date" = "date2"))


Adjust for inflation


This version makes all values equivalent to February 2020 dollars. This is a bit of a pain because it means the standard changes every month, and the formula for the deflator (in the above code chunk, line 119) has to be updated and hard-coded every time. The up side to this approach is that the numbers are in the value that the average person will understand. That understanding may not mean much since the data is converted further down to percent change, so no dollar values actually appear in this version of the analysis, at this time.

txadj$real <- (txadj$deflate * txadj$lead) # real = inflation-adjusted "lead" revs

Percent change

Calculate the percent change from the same month in the previous year. Note the 12 in line 157 - this stipulates the number of time units back to which the lag applies.

The basic approach is to generate a new column of the data of interest lagged by the specified number of time periods. And then apply the math to the contemporaneous difference between the two columns.

tr4 <- txadj %>%
  group_by(city) %>%
  mutate(lag = lag(real,12)) %>%
  mutate(pct.change = (real - lag)/lag*100)       # pct.change = 12-month change in real 

48 month moving average to calculate the last 4 years’ average

At this point, I am really just interested in the average change for the last 4 years. This is a little of a complicated way to get there, but I took a rolling mean (moving average) of the last 48 months, and then just keep the latest date so that the cross section has the 4-year average for each city.

tr4$d.ave <- ave(tr4$pct.change, tr4$city, 
                  FUN= function(x) rollmean(x, k=48,align="right", na.pad=T))

Get just the latest month data to have 1 obs with the 4-yr average per city

tr5 <- subset(tr4, year==2020)
tr5 <- subset(tr5, period=="02")

Look at just the top cities

topcities <- subset(tr5, amt>499000)
summary(topcities)
##       date                 amt               city                lead         
##  Min.   :2020-02-01   Min.   :  501507   Length:181         Min.   :  224712  
##  1st Qu.:2020-02-01   1st Qu.:  723177   Class :character   1st Qu.:  498445  
##  Median :2020-02-01   Median : 1327603   Mode  :character   Median :  907809  
##  Mean   :2020-02-01   Mean   : 3305265                      Mean   : 2190176  
##  3rd Qu.:2020-02-01   3rd Qu.: 3214650                      3rd Qu.: 2104128  
##  Max.   :2020-02-01   Max.   :74024289                      Max.   :50402407  
##       year         period           periodName            value      
##  Min.   :2020   Length:181         Length:181         Min.   :259.1  
##  1st Qu.:2020   Class :character   Class :character   1st Qu.:259.1  
##  Median :2020   Mode  :character   Mode  :character   Median :259.1  
##  Mean   :2020                                         Mean   :259.1  
##  3rd Qu.:2020                                         3rd Qu.:259.1  
##  Max.   :2020                                         Max.   :259.1  
##     date.y             deflate       real               lag          
##  Length:181         Min.   :1   Min.   :  224712   Min.   :  227811  
##  Class :character   1st Qu.:1   1st Qu.:  498445   1st Qu.:  514566  
##  Mode  :character   Median :1   Median :  907809   Median :  943005  
##                     Mean   :1   Mean   : 2190176   Mean   : 2273861  
##                     3rd Qu.:1   3rd Qu.: 2104128   3rd Qu.: 2162834  
##                     Max.   :1   Max.   :50402407   Max.   :51948764  
##    pct.change          d.ave       
##  Min.   :-33.993   Min.   :-5.805  
##  1st Qu.: -8.688   1st Qu.: 1.665  
##  Median : -3.803   Median : 3.094  
##  Mean   : -2.621   Mean   :   Inf  
##  3rd Qu.:  1.850   3rd Qu.: 5.354  
##  Max.   : 47.980   Max.   :   Inf