<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.extremist.software/index.php?action=history&amp;feed=atom&amp;title=Machine_Learning%2Fbrief_statistics_r</id>
	<title>Machine Learning/brief statistics r - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.extremist.software/index.php?action=history&amp;feed=atom&amp;title=Machine_Learning%2Fbrief_statistics_r"/>
	<link rel="alternate" type="text/html" href="https://wiki.extremist.software/index.php?title=Machine_Learning/brief_statistics_r&amp;action=history"/>
	<updated>2026-04-04T12:39:06Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.39.13</generator>
	<entry>
		<id>https://wiki.extremist.software/index.php?title=Machine_Learning/brief_statistics_r&amp;diff=11063&amp;oldid=prev</id>
		<title>ThomasLotze: Created page with &#039;==R code from A Brief Tour of Statistics== &lt;pre&gt; b_prob = c(0.05,0.5,0.35,0.1) b_names = c(&quot;swish&quot;,&quot;basket&quot;,&quot;brick&quot;,&quot;air ball&quot;) b_colors = c(2,2,1,1) b_in_prob = c(0.55,0.45) b_i…&#039;</title>
		<link rel="alternate" type="text/html" href="https://wiki.extremist.software/index.php?title=Machine_Learning/brief_statistics_r&amp;diff=11063&amp;oldid=prev"/>
		<updated>2010-05-06T07:08:16Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;#039;==R code from A Brief Tour of Statistics== &amp;lt;pre&amp;gt; b_prob = c(0.05,0.5,0.35,0.1) b_names = c(&amp;quot;swish&amp;quot;,&amp;quot;basket&amp;quot;,&amp;quot;brick&amp;quot;,&amp;quot;air ball&amp;quot;) b_colors = c(2,2,1,1) b_in_prob = c(0.55,0.45) b_i…&amp;#039;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;==R code from A Brief Tour of Statistics==&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
b_prob = c(0.05,0.5,0.35,0.1)&lt;br /&gt;
b_names = c(&amp;quot;swish&amp;quot;,&amp;quot;basket&amp;quot;,&amp;quot;brick&amp;quot;,&amp;quot;air ball&amp;quot;)&lt;br /&gt;
b_colors = c(2,2,1,1)&lt;br /&gt;
b_in_prob = c(0.55,0.45)&lt;br /&gt;
b_in_names = c(&amp;quot;in&amp;quot;,&amp;quot;out&amp;quot;)&lt;br /&gt;
b_in_colors = c(2,1)&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;basketball_shots.png&amp;quot;)&lt;br /&gt;
barplot(b_prob,names.arg=b_names,legend.text=TRUE,ylim=c(0,1),col=b_colors,ylab=&amp;quot;Probability&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;basketball_in.png&amp;quot;)&lt;br /&gt;
barplot(b_in_prob,names.arg=b_in_names,legend.text=TRUE,ylim=c(0,1),col=b_in_colors,ylab=&amp;quot;Probability&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
apple_lambda = 1181.25/(8*30)  # based on http://answers.yahoo.com/question/index?qid=20090205070515AAWn8PL 7.5 bushels * 45 lbs./bushel * 3.5 apples/lb.&lt;br /&gt;
				# growing season around 8 months&lt;br /&gt;
apple_range = (qpois(0.001,lambda=apple_lambda):qpois(0.999,lambda=apple_lambda))&lt;br /&gt;
apple_prob = dpois(apple_range,lambda=apple_lambda)&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;apple_distribution.png&amp;quot;)&lt;br /&gt;
barplot(apple_prob,names.arg=apple_range,legend.text=TRUE,xlab=&amp;quot;Apples per tree per day&amp;quot;,ylab=&amp;quot;Probability&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
# Bus waiting times are Poisson&lt;br /&gt;
bus_lambda = 10&lt;br /&gt;
bus_range = (0:qpois(0.999,lambda=bus_lambda))&lt;br /&gt;
bus_prob = dpois(bus_range,lambda=bus_lambda)&lt;br /&gt;
png(&amp;quot;bus_distribution.png&amp;quot;,width=10*300,height=2*300)&lt;br /&gt;
barplot(bus_prob,names.arg=bus_range,legend.text=TRUE,xlab=&amp;quot;Wait time&amp;quot;,ylab=&amp;quot;Probability&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
# Rainfall is lognormal&lt;br /&gt;
avg_rainfall = 4.4&lt;br /&gt;
rain_amt = seq(0,50,by=0.01)&lt;br /&gt;
rain_prob = dlnorm(rain_amt,meanlog=log(avg_rainfall))&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;rain_distribution.png&amp;quot;)&lt;br /&gt;
plot(rain_amt,rain_prob,type=&amp;quot;l&amp;quot;,xlab=&amp;quot;Rain (inches)&amp;quot;,ylab=&amp;quot;Probability density&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;rain_distribution_1020.png&amp;quot;)&lt;br /&gt;
plot(rain_amt,rain_prob,type=&amp;quot;l&amp;quot;,xlab=&amp;quot;Rain (inches)&amp;quot;,ylab=&amp;quot;Probability density&amp;quot;)&lt;br /&gt;
for(rain in (seq(10,20,by=0.01))) {&lt;br /&gt;
	points(rep(rain,2),c(0,dlnorm(rain,meanlog=log(avg_rainfall))),type=&amp;quot;l&amp;quot;,col=&amp;quot;grey&amp;quot;)&lt;br /&gt;
}&lt;br /&gt;
points(rain_amt,rain_prob,type=&amp;quot;l&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
##  Balls in bins&lt;br /&gt;
&lt;br /&gt;
ball_color=c(&amp;quot;yellow&amp;quot;,&amp;quot;green&amp;quot;,&amp;quot;blue&amp;quot;,&amp;quot;red&amp;quot;,&amp;quot;white&amp;quot;)&lt;br /&gt;
ball_label=c(&amp;quot;yellow&amp;quot;,&amp;quot;green&amp;quot;,&amp;quot;blue&amp;quot;,&amp;quot;red&amp;quot;,&amp;quot;child&amp;quot;)&lt;br /&gt;
ball_probability=c(0.32,0.24,0.18,0.16,0.1)&lt;br /&gt;
png(&amp;quot;ball_distribution.png&amp;quot;)&lt;br /&gt;
barplot(ball_probability,names.arg=ball_label,legend.text=TRUE,xlab=&amp;quot;Ball color&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,col=ball_color)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;ball_distribution2.png&amp;quot;)&lt;br /&gt;
barplot(c(0.32,0.68),names.arg=c(&amp;quot;yellow&amp;quot;,&amp;quot;non-yellow&amp;quot;),legend.text=TRUE,xlab=&amp;quot;Ball color&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,col=c(&amp;quot;yellow&amp;quot;,&amp;quot;white&amp;quot;))&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
## Distribution of number of baskets&lt;br /&gt;
n = 15&lt;br /&gt;
basket_range = (qbinom(0.001,size=n,prob=b_in_prob[1]):qbinom(0.999,size=n,prob=b_in_prob[1]))&lt;br /&gt;
basket_prob = dbinom(basket_range,size=n,prob=b_in_prob[1])&lt;br /&gt;
png(sprintf(&amp;quot;basket_distribution_%d.png&amp;quot;,n))&lt;br /&gt;
barplot(basket_prob,names.arg=basket_range,legend.text=TRUE,xlab=&amp;quot;Number of baskets&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,main=sprintf(&amp;quot;%d shots&amp;quot;,n))&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
n = 30&lt;br /&gt;
basket_range = (qbinom(0.001,size=n,prob=b_in_prob[1]):qbinom(0.999,size=n,prob=b_in_prob[1]))&lt;br /&gt;
basket_prob = dbinom(basket_range,size=n,prob=b_in_prob[1])&lt;br /&gt;
png(sprintf(&amp;quot;basket_distribution_%d.png&amp;quot;,n))&lt;br /&gt;
barplot(basket_prob,names.arg=basket_range,legend.text=TRUE,xlab=&amp;quot;Number of baskets&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,main=sprintf(&amp;quot;%d shots&amp;quot;,n))&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
horse_data=c(109,65,22,3,1)&lt;br /&gt;
png(&amp;quot;horse_kick.png&amp;quot;)&lt;br /&gt;
b=barplot(horse_data,names.arg=(0:4),legend.text=TRUE,xlab=&amp;quot;Number of Deaths from Horse Kick&amp;quot;,ylab=&amp;quot;Occurrences&amp;quot;)&lt;br /&gt;
mean_death = sum(horse_data*(0:4))/sum(horse_data)&lt;br /&gt;
death_points = (0:4)&lt;br /&gt;
#points(b,sum(horse_data)*dpois(death_points,lambda=mean_death),col=2,pch=19)&lt;br /&gt;
#points(b,sum(horse_data)*dpois(death_points,lambda=mean_death),type=&amp;quot;l&amp;quot;,col=2)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
png(&amp;quot;uniform.png&amp;quot;)&lt;br /&gt;
barplot(rep(0.2,5),names.arg=(0:4),legend.text=TRUE,ylab=&amp;quot;Probability&amp;quot;,ylim=c(0,1))&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
## Ways of estimating&lt;br /&gt;
&lt;br /&gt;
# distribution of the mean&lt;br /&gt;
&lt;br /&gt;
n = 38&lt;br /&gt;
p = 21/38&lt;br /&gt;
&lt;br /&gt;
basket_range = (qbinom(0.001,size=n,prob=p):qbinom(0.999,size=n,prob=p))&lt;br /&gt;
basket_prob = dbinom(basket_range,size=n,prob=p)&lt;br /&gt;
png(sprintf(&amp;quot;estimate_distribution_%0.2f_%d.png&amp;quot;,p,n))&lt;br /&gt;
barplot(basket_prob,names.arg=sprintf(&amp;quot;%0.2f&amp;quot;,basket_range/n),legend.text=TRUE,xlab=&amp;quot;Estimated Field Goal Rate&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,main=sprintf(&amp;quot;%0.2f Field Goal rate&amp;quot;,p))&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
## Determining basket shooting percentage&lt;br /&gt;
&lt;br /&gt;
n = 38&lt;br /&gt;
p = 0.5&lt;br /&gt;
&lt;br /&gt;
for (p in c(0.25,0.5,0.75)) {&lt;br /&gt;
&lt;br /&gt;
basket_range = (qbinom(0.001,size=n,prob=p):qbinom(0.999,size=n,prob=p))&lt;br /&gt;
basket_prob = dbinom(basket_range,size=n,prob=p)&lt;br /&gt;
png(sprintf(&amp;quot;field_goal_distribution_%0.2f_%d.png&amp;quot;,p,n))&lt;br /&gt;
barplot(basket_prob,names.arg=basket_range,legend.text=TRUE,xlab=&amp;quot;Number of baskets&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,main=sprintf(&amp;quot;%0.2f field goal rate&amp;quot;,p))&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
print(sprintf(&amp;quot;%0.2f: %s of %d&amp;quot;,p,paste(rbinom(5,size=n,prob=p),sep=&amp;quot;, &amp;quot;),n))&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
# 38 shots, make 21&lt;br /&gt;
basket_range = (qbinom(0.001,size=n,prob=p):qbinom(0.999,size=n,prob=p))&lt;br /&gt;
basket_prob = dbinom(basket_range,size=n,prob=p)&lt;br /&gt;
png(sprintf(&amp;quot;field_goal_distribution_%0.2f_%d.png&amp;quot;,p,n))&lt;br /&gt;
barplot(basket_prob,names.arg=basket_range,legend.text=TRUE,xlab=&amp;quot;Number of baskets&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,main=sprintf(&amp;quot;%0.2f field goal rate&amp;quot;,p))&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# confidence interval and p-value&lt;br /&gt;
&lt;br /&gt;
for (p in seq(0,1,by=0.01)) {&lt;br /&gt;
	print(sprintf(&amp;quot;%0.2f: P(&amp;gt;=21 baskets) = %0.4f&amp;quot;,p,1-pbinom(20,size=n,prob=p)))&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
for (p in c(0.30,0.34,0.41,0.44,0.47)) {&lt;br /&gt;
	basket_range = (0:38)&lt;br /&gt;
	basket_prob = dbinom(basket_range,size=n,prob=p)&lt;br /&gt;
	basket_color = rep(1,39)&lt;br /&gt;
	basket_color[basket_range &amp;gt;= 21] = 2&lt;br /&gt;
	png(sprintf(&amp;quot;field_goal_pval_%0.2f_%d.png&amp;quot;,p,n))&lt;br /&gt;
	barplot(basket_prob, names.arg=basket_range, legend.text = TRUE, xlab=&amp;quot;Number of baskets&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,main=sprintf(&amp;quot;%0.2f field goal rate&amp;quot;,p),col=basket_color)&lt;br /&gt;
	dev.off()&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# CLT&lt;br /&gt;
&lt;br /&gt;
p = 21/38&lt;br /&gt;
for (n in c(10,100,1000)) {&lt;br /&gt;
basket_range = (qbinom(0.001,size=n,prob=b_in_prob[1]):qbinom(0.999,size=n,prob=b_in_prob[1]))&lt;br /&gt;
basket_prob = dbinom(basket_range,size=n,prob=b_in_prob[1])&lt;br /&gt;
png(sprintf(&amp;quot;basket_distribution_clt_%d.png&amp;quot;,n))&lt;br /&gt;
barplot(basket_prob,names.arg=basket_range,legend.text=TRUE,xlab=&amp;quot;Number of baskets&amp;quot;,ylab=&amp;quot;Probability&amp;quot;,main=sprintf(&amp;quot;%d shots&amp;quot;,n))&lt;br /&gt;
dev.off()&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# Regression&lt;br /&gt;
&lt;br /&gt;
n = 100000&lt;br /&gt;
&lt;br /&gt;
x = rpois(n,lambda=17)&lt;br /&gt;
y = 32.7 + x*12.3 + rnorm(n, mean=0, sd=50)&lt;br /&gt;
&lt;br /&gt;
my_lm = lm(y~x)&lt;br /&gt;
sorted_x = unique(x[order(x)])&lt;br /&gt;
predicted = predict(my_lm,newdata=data.frame(x=sorted_x))&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;regression.png&amp;quot;)&lt;br /&gt;
plot(x,y)&lt;br /&gt;
points(sorted_x,predicted,type=&amp;quot;l&amp;quot;,col=2,lwd=3)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;residuals.png&amp;quot;)&lt;br /&gt;
residuals = y - predict(my_lm,newdata=data.frame(x=x))&lt;br /&gt;
plot(x,residuals)&lt;br /&gt;
abline(a=0,b=0,col=2,lwd=3)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;residuals2.png&amp;quot;)&lt;br /&gt;
plot(residuals,x)&lt;br /&gt;
lines(x=c(0,0),y=range(x),col=2,lwd=3)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;residuals_hist.png&amp;quot;)&lt;br /&gt;
hist(residuals)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# logit&lt;br /&gt;
&lt;br /&gt;
library(&amp;quot;boot&amp;quot;)&lt;br /&gt;
x = (-10:10)&lt;br /&gt;
png(&amp;quot;inv_logit.png&amp;quot;)&lt;br /&gt;
plot(x,inv.logit(x),type=&amp;quot;l&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
# non-normal&lt;br /&gt;
&lt;br /&gt;
# mean 500, variance 30&lt;br /&gt;
mean = 500&lt;br /&gt;
variance = 500&lt;br /&gt;
k = 100000&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;normal.png&amp;quot;)&lt;br /&gt;
hist(rnorm(k,mean=mean,sd=sqrt(variance)),xlab=&amp;quot;Value&amp;quot;,ylab=&amp;quot;Frequency&amp;quot;,main=&amp;quot;Normal&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
# chi-squared&lt;br /&gt;
#png(&amp;quot;chisquared.png&amp;quot;)&lt;br /&gt;
#hist(rchisq(k,df=mean),xlab=&amp;quot;Value&amp;quot;,ylab=&amp;quot;Frequency&amp;quot;,main=&amp;quot;Chi-Squared&amp;quot;)&lt;br /&gt;
#dev.off()&lt;br /&gt;
&lt;br /&gt;
# bimodal&lt;br /&gt;
png(&amp;quot;bimodal.png&amp;quot;)&lt;br /&gt;
diff = sqrt(variance)-2&lt;br /&gt;
bimodal = c(rnorm(k/2,mean=mean-diff,sd=sqrt(variance-diff^2)),rnorm(k/2,mean=mean+diff,sd=sqrt(variance-diff^2)))&lt;br /&gt;
hist(bimodal, xlab=&amp;quot;Value&amp;quot;,ylab=&amp;quot;Frequency&amp;quot;,main=&amp;quot;Bimodal&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;uniform.png&amp;quot;)&lt;br /&gt;
range=sqrt(12*variance)&lt;br /&gt;
hist(runif(k,mean-range/2,mean+range/2),xlab=&amp;quot;Value&amp;quot;,ylab=&amp;quot;Frequency&amp;quot;,main=&amp;quot;Uniform&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# not enough black swans&lt;br /&gt;
&lt;br /&gt;
for (n in c(100,1000,10000)) {&lt;br /&gt;
png(sprintf(&amp;quot;hurricane_%d.png&amp;quot;,n))&lt;br /&gt;
barplot(c(0,n),names.arg=c(&amp;quot;Hurricane&amp;quot;,&amp;quot;No Hurricane&amp;quot;),legend.text=TRUE,ylab=&amp;quot;Days&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
#######################################################&lt;br /&gt;
&lt;br /&gt;
sf_monthly_rainfall_mean = c(4.4,3.3,3.1,1.4,0.3,0.1,0.0,0.1,0.3,1.3,2.9,3.1)&lt;br /&gt;
this_month = as.numeric(format(Sys.time(),format=&amp;quot;%m&amp;quot;))&lt;br /&gt;
cur_mean = sf_monthly_rainfall_mean[this_month]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;heads_tails_fair.png&amp;quot;)&lt;br /&gt;
barplot(c(0.5-1/12000,0.5-1/12000,1/6000),names.arg=c(&amp;quot;heads&amp;quot;,&amp;quot;tails&amp;quot;,&amp;quot;side&amp;quot;),legend.text=TRUE,ylim=c(0,1),ylab=&amp;quot;Probability&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&lt;br /&gt;
png(&amp;quot;heads_tails_biased.png&amp;quot;)&lt;br /&gt;
barplot(c(0.8-1/12000,0.2-1/12000,1/6000),names.arg=c(&amp;quot;heads&amp;quot;,&amp;quot;tails&amp;quot;,&amp;quot;side&amp;quot;),legend.text=TRUE,ylim=c(0,1),ylab=&amp;quot;Probability&amp;quot;)&lt;br /&gt;
dev.off()&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;/div&gt;</summary>
		<author><name>ThomasLotze</name></author>
	</entry>
</feed>