dwt <- c(2566, 3704, 4008, 5984, 7700, 8034, 8229, 7858, 8408, 8939)
year <- c(1970, 1980, 1990, 2000, 2006, 2007, 2008, 2009, 2010, 2011)
library(ggplot2)
dwt <- c(2566, 3704, 4008, 5984, 7700, 8034, 8229, 7858, 8408, 8939)
year <- c(1970, 1980, 1990, 2000, 2006, 2007, 2008, 2009, 2010, 2011)
df1 <- data.frame(year, dwt)
# ggplot
ggplot(df1,aes(year, dwt)) +
geom_point(color = 'red') +
xlab('Year') +
ylab('Millions of dead weight tons') +
ylim(0,10000)
unitCost <- c(700, 200, 150, 50, 15, 3, 1) shipCost <- c(10.00, 1.50, 1.00, 0.15, 0.15, 0.05, 0.01) item <- c('TV set', 'DVD player', 'Vacuum cleaner', 'Scotch whisky', 'Coffee', 'Biscuits', 'Beer')
library(ggplot2)
unitCost <- c(700, 200, 150, 50, 15, 3, 1)
shipCost <- c(10.00, 1.50, 1.00, 0.15, 0.15, 0.05, 0.01)
diff <- shipCost/unitCost*100
item <- c('TV set', 'DVD player', 'Vacuum cleaner', 'Scotch whisky', 'Coffee', 'Biscuits', 'Beer')
df2 <- data.frame(unitCost, shipCost,diff, item)
# ggplot
ggplot(df2,aes(item, diff)) +
geom_col(fill = 'bisque') +
xlab('Item') +
ylab('Relative cost of shipping (%)')
library(ggplot2)
library(DBI)
library(RMySQL)
# connect to the database
conn <- dbConnect(RMySQL::MySQL(),"www.richardtwatson.com", dbname = "ClassicModels", user = "student", password = "student")
# Query the database and create file for use with R
d <- dbGetQuery(conn,"SELECT productScale from Products;")
# Plot the number of product lines by specifying the appropriate column name
# Internal fill color is blue
# ggplot
ggplot(d,aes(productScale)) +
geom_bar(fill = 'blue') +
xlab('Product scale') +
ylab('Frequency')
library(ggplot2)
library(DBI)
library(RMySQL)
# connect to the database
conn <- dbConnect(RMySQL::MySQL(),"www.richardtwatson.com", dbname = "ClassicModels", user = "student", password = "student")
# Query the database and create file for use with R
d <- dbGetQuery(conn,"SELECT MONTH(paymentDate) as month, SUM(amount) AS monthTotal FROM Payments WHERE YEAR(paymentDate)= 2004 GROUP BY MONTH(paymentDate);") # ggplot
ggplot(d,aes(month, monthTotal)) +
geom_line(color = 'green') +
xlab('Month') +
ylab('Total payments')
library(ggplot2)
library(DBI)
library(RMySQL)
# connect to the database
conn <- dbConnect(RMySQL::MySQL(),"www.richardtwatson.com", dbname = "ClassicModels", user = "student", password = "student")
# Query the database and create file for use with R
d <- dbGetQuery(conn,"SELECT country, sum(quantityOrdered*priceEach) AS orders FROM Orders, OrderDetails, Customers WHERE Orders.orderNumber = OrderDetails.orderNumber and Customers.customerNumber = Orders.customerNumber AND country IN ('Denmark','Finland', 'Norway','Sweden') GROUP BY countr
# ggplot
ggplot(d,aes(country, orders)) +
geom_col(fill = 'yellow') +
xlab('Country') +
ylab('Orders')
library(ggplot2)
library(DBI)
library(RMySQL)
# connect to the database
conn <- dbConnect(RMySQL::MySQL(),"www.richardtwatson.com", dbname = "ClassicModels", user = "student", password = "student")
# Query the database and create file for use with R
d <- dbGetQuery(conn,"SELECT count(*) as frequency, productLine, city
FROM Products, OrderDetails, Orders, Customers
WHERE Products.productCode = OrderDetails.productCode
AND OrderDetails.orderNumber = Orders.orderNumber
AND Orders.customerNumber = Customers.customerNumber
AND country = 'Norway'
GROUP BY productLine, city")
# ggplot
ggplot(d,aes(productLine, city)) +
geom_tile(aes(fill=frequency)) +
scale_fill_gradient(low=" light blue", high="dark blue") +
xlab('Line') +
ylab('Scale')
library(ggplot2) library(DBI) library(RMySQL) library(ggmap) # connect to the database conn <- dbConnect(RMySQL::MySQL(),"www.richardtwatson.com", dbname = "ClassicModels", user = "student", password = "student") # Query the database and create file for use with R d <- dbGetQuery(conn,"SELECT y(officeLocation) AS lon, x(officeLocation) AS lat FROM Offices;") map <- get_googlemap('tokyo, japan',marker=d,zoom=5) ggmap(map) + labs(x = 'Longitude', y = 'Latitude') + ggtitle('Japanese offices')
library(ggplot2) library(DBI) library(RMySQL) library(ggmap) # connect to the database conn <- dbConnect(RMySQL::MySQL(),"www.richardtwatson.com", dbname = "ClassicModels", user = "student", password = "student") # Query the database and create file for use with R d <- dbGetQuery(conn,"SELECT y(customerLocation) AS lon, x(customerLocation) AS lat FROM Customers WHERE customerNumber NOT IN (SELECT customerNumber FROM Orders);") map <- get_googlemap('europe',marker=d,zoom=4) ggmap(map) + labs(x = 'Longitude', y = 'Latitude') + ggtitle('European customers without orders')
library(ggplot2)
library(readr)
url <- 'http://www.richardtwatson.com/data/manheim.txt'
t <- read_delim(url, delim=',')
# ggplot
ggplot(t,aes(model)) +
geom_bar(fill = 'lightgreen')
library(ggplot2)
library(readr)
url <- 'http://www.richardtwatson.com/data/manheim.txt'
t <- read_delim(url, delim=',')
# ggplot
ggplot(t,aes(sale)) +
geom_bar(fill = 'violet')
library(ggplot2)
library(readr)
# read with Import Dataset
url <- 'http://www.richardtwatson.com/data/electricityprices.csv'
t <- read_delim (url, delim = ',')
# ggplot
ggplot(t,aes(factor(0),cost)) +
geom_boxplot(outlier.colour='red') +
xlab("Hourly measures") +
ylab("Cost")
library(ggplot2) library(readr) url <- 'http://www.richardtwatson.com/data/wealth.csv' t <- read_delim(url, delim=',') # ggplot ggplot(t, aes(`GDP per capita`)) + geom_histogram(fill='firebrick') ggplot(t, aes(`GDP per capita`)) + geom_histogram(fill='darkorange') ggplot(t, aes(`GDP per capita`)) + geom_histogram(fill='darkolivegreen')
Merge the weather and electricity price data for a major city. The merged file should contain air temperature and electricity cost. Also, you need to convert air temperature from a factor to a numeric (hint: first convert it to a character).
library(readr)
library(dplyr)
wurl <- 'http://www.richardtwatson.com/data/weather.csv'
w <- read_delim(wurl,delim = ',')
eurl <- 'http://www.richardtwatson.com/data/electricityprices.csv'
e <- read_delim(eurl,delim = ',')
m <- inner_join(e,w, by = c('timestamp' = 'Timestamp'))
m$airtemp <- as.numeric(as.character(m$Air.Temp..F.))
head(m)
Compute the correlation between temperature and electricity price. What do you conclude?
cor.test(m$airtemp,m$cost)Conclude there is a significant relationship between temperature and cost of electricity (p < .05)
library(ggplot2)
m$airtemp <- as.numeric(m$Air.Temp..F.)
# ggplot
ggplot(m, aes(airtemp, cost)) +
geom_point(color ='FireBrick') +
xlab('Air temperature') +
ylab('Electricity cost')
library(ggplot2)
library(dplyr)
m1 <- m %>% filter(airtemp >= 95)
# ggplot
ggplot(m1,aes(airtemp, cost)) +
geom_point(color ='FireBrick') +
xlab('Air temperature') +
ylab('Electricity cost')
library(ggplot2)
library(dplyr)
m$Cut <- 'Not so hot'
m$Cut[m$airtemp >= 90] <- 'Hot'
# ggplot
ggplot(m,aes(airtemp, cost, group = Cut)) +
geom_point(aes(color = Cut)) +
xlab('Air temperature') +
ylab('Electricity cost')
This page is part of the promotional and support
material for Data Management (open edition) by Richard T. Watson For questions and comments please contact the author |