This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

plot(cars)

typeof(cars)
ncol(cars);nrow(cars)
cars$speed
cars$dist
cars.fast <- filter(cars, speed >= 20)
cars.fast
typeof(cars.fast)

library(dplyr)

cars.speed <- select(cars, speed)
typeof(cars.speed)
names(cars.speed)

cars %>% filter(speed >= 10 & speed <= 20) %>% select(speed)


head(iris)
select(iris, contains("."))
select(iris, ends_with("Length"))
select(iris, everything())
select(iris, matches(".t.")) #RegEx
select(iris, one_of(c("Species", "Genus")))
select(iris, starts_with("Sepal"))
select(iris, Sepal.Length:Petal.Width)
select(iris, -Species)

first(cars)
last(cars)
nth(iris, 3)


cars
cars.sorted <- arrange(cars, desc(dist))
cars.sorted
cars

ncol(cars);nrow(cars)
dim(cars)
cars.unikalne <- distinct(cars)
dim(cars.unikalne)

top_n(cars,5)
sample_n(cars, 5)
sample_frac(cars, 0.2)
slice(cars, 10:20)

mutate(cars, time = dist / speed)
cars.SI <- transmute(cars, speed_kmh = speed / 1.609344, dist_km = dist / 1.609344)
cars.SI

group_by(cars, speed)

cars %>%
  filter(speed >= 10 & speed <= 15) %>%
  group_by(speed) %>% 
  summarise(srednia = mean(dist)) %>%
  arrange(desc(srednia))

summarise(cars, sum(dist))
sum(cars$dist)

dim(cars2c)
cars2r <- bind_rows(cars, cars)
cars2c <- bind_cols(cars, cars)
cars2c

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

d1 <- data.frame()
for(i in 1:10)
{
  values = rnorm(3)
  r <- c(i, values)
  d1 <- rbind(d1, r)
}
names(d1) <- c("i", "v1", "v2", "v3")
head(d1)

d2 <- data.frame()
for(i in 1:10)
{
  values = rnorm(3)
  #r <- c(i, values)
  r <- data.frame(i, values)
  d2 <- rbind(d2, r)
}
#names(d2) <- c("i", "v1", "v2", "v3")
head(d2)
tail(d2)

#wide -> long
d1.0 <- reshape(d1,
                varying = c("v1", "v2", "v3"), v.names="values",
                times=c("v1", "v2", "v3"), timevar="name",
                idvar="i",
                direction="long")
d1.0

#long -> wide
d2.0 <- d2
d2.0$name <- c("v1", "v2", "v3")
head(d2.0)
d2.1 <- reshape(d2.0, idvar="i", timevar = "name", direction = "wide")
d2.1

library(tidyr)
d1.a <- pivot_longer(d1, c(v1,v2,v3))
d1.a

d2.a <- d2
d2.a$name <- c("v1","v2","v3")
d2.a
d2.b <- pivot_wider(d2.a, id_cols = i, names_from = name, values_from = values)
d2.b


d3 <- data.frame()
for(i in 1:10)
{
  values = rnorm(3)
  #r <- c(i, values)
  r <- c(i, values)
  d3 <- rbind(d3, r)
}
names(d3) <- c("i", "v1", "v2", "v3")
head(d3)
tail(d3)
plot(x = iris$Sepal.Length, y = iris$Sepal.Width, xlab = "Sepal Length", ylab = "Sepal Width", main = "Sepal Length-Width")

library(ggplot2)
p <- ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width))
p + geom_point(aes(color = Species, shape=Species)) + 
  xlab("Sepal Length") + ylab("Sepal Width") + ggtitle("Sepal Length-Width")

boxplot(Sepal.Length~Species, data = iris, xlab="Species", ylab="Sepal Length", main="iris boxplot")

b <- ggplot(data = iris, aes(x = Species, y = Sepal.Length))
b + geom_boxplot(aes(fill = Species)) + ylab("Sepal Length") + ggtitle("iris boxplot") + stat_summary(fun.y = mean, geom = "point", shape = 5, size = 4)

hist(iris$Sepal.Width, breaks = 12, xlab = "Sepal Width", ylab = "Frequency", main = "Histogram")
h <- ggplot(data = iris, aes(x = Sepal.Width))
h + geom_histogram(binwidth = 0.2, color = "black", aes(fill = Species)) + xlab("Sepal Width") + ylab("Frequency") + ggtitle("Histogram")

dh <- ggplot(data = iris, aes(x = Sepal.Width, fill=Species))
dh + geom_density(stat = "density", alpha = I(0.2)) + xlab("Sepal Width") + ylab("Frequency") + ggtitle("Histogram")

iris1 <- iris[sample(1:nrow(iris), 110),]
iris1.df <- data.frame(Species=c("setosa", "versicolor", "virginica"), max = c(50,50,50), min = c(20,25,30), v=c(30,40,45))
iris1.df

barplot(table(iris1$Species), col = "black", xlab = "Species", ylab = "Count", main = "Bar plot")
b1 <- ggplot(data = iris1, aes(x = Species))
b1 + geom_bar() + xlab("Species") + ylab("Count") + ggtitle("Bar plot") +
  geom_errorbar(data = iris1.df, aes(y = v, ymin = min, ymax = max), col="red", linetype="dashed")


q <- as.vector(table(iris1$Species))
q
qp <- cumsum(q) + q/2
qp
q.df <- data.frame(Species=c("sentosa", "versicolor", "virginica"), quantity = q, position = qp)
q.df

p1 <- ggplot(iris1, aes(x = factor(1), fill = Species)) +
  geom_bar(width = 1) +
  geom_text(data = q.df, aes(x = factor(1), y = position, label = quantity), size = 5) + labs(x = "", y = "")
p1
p1 + coord_polar(theta = "y")

#CZĘŚĆ NIEOMAWIANA W FILMIE
#Wykres liniowy
#install.packages("plyr")
library(plyr)
sepal.min <- ddply(iris, "Species", summarise, xval=min(Sepal.Length), yval=min(Sepal.Width))
sepal.max <- ddply(iris, "Species", summarise, xval=max(Sepal.Length), yval=max(Sepal.Width))
sepal <- rbind(sepal.min, sepal.max)
sepal 

ggplot(sepal, aes(x=xval, y=yval, group = Species, color=Species)) +
  geom_line(aes(linetype=Species), size = 1.2) +
  geom_point(aes(shape=Species), size = 4) +        
  scale_shape_manual(values=c(6, 5, 4)) +               
  scale_linetype_manual(values=c("dotdash", "solid", "dotted")) +
  xlab("Sepal Length") + ylab("Sepal Width") + ggtitle("Line plot of sepal length and width")

smooth <- ggplot(data=iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) + 
  geom_point(aes(shape=Species), size=1.5) + xlab("Sepal Length") + ylab("Sepal Width") + 
  ggtitle("Scatterplot with smoothers")

smooth + geom_smooth(method="lm") #linear model
smooth + geom_smooth(method="loess") #local polynomial regression
smooth + geom_smooth(method="gam", formula= y~s(x, bs="cs")) #generalised additive model

#Podział na podwykresy
facet <- ggplot(data=iris, aes(Sepal.Length, y=Sepal.Width, color=Species)) + 
  geom_point(aes(shape=Species), size=1.5) + geom_smooth(method="lm") +
  xlab("Sepal Length") + ylab("Sepal Width") + ggtitle("Faceting")

facet + facet_grid(. ~ Species) #poziomo
facet + facet_grid(Species ~ .) #pionowo

#Wykres "wulkaniczny"
vol <- ggplot(data=iris, aes(x = Sepal.Length))
vol + stat_density(aes(ymax = ..density..,  ymin = -..density.., 
                       fill = Species, color = Species), 
                   geom = "ribbon", position = "identity") +
  facet_grid(. ~ Species) + coord_flip() + xlab("Sepal Length") 

#Wykres "rug plot"
ggplot(data=iris, aes(x=Sepal.Length, Petal.Length)) + geom_point() + 
  geom_rug(col="steelblue",alpha=0.1) + xlab("Sepal Length") + ylab("Petal Length")

#Mapa cieplna (heatmap)
library(reshape2)
dat <- iris[,1:4]
cor <- melt(cor(dat, use="p"))
head(cor)
heat <- ggplot(data=cor, aes(x=Var1, y=Var2, fill=value)) 
heat + geom_tile() + labs(x = "", y = "") + scale_fill_gradient2(limits=c(-1, 1))

#Zapis wykresu do pliku
plot <- ggplot(data=iris, aes(x=Sepal.Length, y=Sepal.Width)) + 
  geom_point(aes(shape=Species, color=Species))

setwd("d:\\TMP") #tylko na czas wykonywania bieżącego fragmentu (resetowane po jego zakończeniu)
ggsave("plot1.png")
ggsave(plot, file="plot2.png")
ggsave(plot, file="plot3.png", width=6, height=4)

#Wykres "bąbelkowy" - dane z pliku w sieci (przestępstwa z wstanach)
crime <- read.csv("http://datasets.flowingdata.com/crimeRatesByState2005.tsv", header=TRUE, sep="\t")
ggplot(data=crime, aes(x=murder, y=burglary, size=population, label=state),guide=FALSE)+
geom_point(color="white", fill="red", shape=21)+ scale_size_area(max_size = 15)+
scale_x_continuous(name="Murders per 1,000 population", limits=c(0,12))+
scale_y_continuous(name="Burglaries per 1,000 population", limits=c(0,1250))+
geom_text(size=2.5)+ theme_bw()
s0 <- ""
s <- "Uniwersytet Mikołaja KOpernika w Toruniu"
s1 <- s
s2 <- substring(s, 1, 11)

stri_isempty(s0)
stri_length(s); length(s);  nchar(s)
stri_length(s0); length(s0); nchar(s0)

s == s1
s1 == s0

stri_locale_get()

s > s1
s > s0
s1 > s2

stri_cmp(s2, s)
c(s1,s2,s,s0)
stri_sort(c(s1,s2,s,s0))

stri_paste("Uniwersytet", "Mikołaja", "Kopernika", sep = " ")

stri_dup("UMK", 3)
stri_trim("\tUMK ")

stri_pad("UMK", width=10, side="both")

toupper("Mikołaj"); stri_trans_toupper("Mikołaj")
stri_trans_char("uniwersytet mikołaja kopernika", "umk", "UMK")
stri_trans_char("załóźć gęślą jaźń", "żółćęśążźń", "zolcesazzn")

stri_sub(s, from = 13, to = 19)
stri_sub(s, -9) <- "w Bydgoszczy"
s

stri_extract_all_words(s)

stri_count_fixed(s, "aj")

endsWith(s, s2)
