# Select the second column, named day2, from li_df: secondsecond<-li_df$day2# Build a logical vector, TRUE if value in second is extreme: extremesextremes<-(second>25|second<5)# Count the number of TRUEs in extremessum(extremes)
# Variables related to your last day of recordingsmedium<-'LinkedIn'num_views<-14# Examine the if statement for mediumif (medium=='LinkedIn'){print('Showing LinkedIn information')}
1
## [1] "Showing LinkedIn information"
# Write the if statement for num_viewsif (num_views>15){print('You\'re popular!')}
Add an else
# Variables related to your last day of recordingsmedium<-'LinkedIn'num_views<-14# Control structure for mediumif (medium=='LinkedIn'){print('Showing LinkedIn information')}else{print('Unknown medium')}
1
## [1] "Showing LinkedIn information"
# Control structure for num_viewsif (num_views>15){print('You\'re popular!')}else{print('Try to be more visible!')}
1
## [1] "Try to be more visible!"
Customize further: else if
# Variables related to your last day of recordingsmedium<-'LinkedIn'num_views<-14# Control structure for mediumif (medium=='LinkedIn'){print('Showing LinkedIn information')}elseif (medium=='Facebook'){print('Showing Facebook information')}else{print('Unknown medium')}
1
## [1] "Showing LinkedIn information"
# Control structure for num_viewsif (num_views>15){print('You\'re popular!')}elseif (num_views>10|num_views<=15){print('Your number of views is average')}else{print('Try to be more visible!')}
1
## [1] "Your number of views is average"
Take control!
# Variables related to your last day of recordingsli<-15fb<-9# Code the control-flow constructif (li>=15&fb>=15){sms<-2*(li+fb)}elseif (li<10&fb<10){sms<-(li+fb)/2}else{sms<-li+fb}# Print the resulting sms to the consolesms
# Initialize the speed variablespeed<-64# Extend/adapt the while loopwhile (speed>30){print(paste('Your speed is ',speed))if (speed>48){print('Slow down big time!')speed<-speed-11}else{print('Slow down!')speed<-speed-6}}
12345678
## [1] "Your speed is 64"
## [1] "Slow down big time!"
## [1] "Your speed is 53"
## [1] "Slow down big time!"
## [1] "Your speed is 42"
## [1] "Slow down!"
## [1] "Your speed is 36"
## [1] "Slow down!"
Stop the while loop: break
# Initialize the speed variablespeed<-88while (speed>30){print(paste('Your speed is',speed))# Break the while loop when speed exceeds 80if (speed>80){break}elseif (speed>48){print('Slow down big time!')speed<-speed-11}else{print('Slow down!')speed<-speed-6}}
1
## [1] "Your speed is 88"
Build a while loop from scratch
strsplit; split up in a vector that contains separate letters.
# Initialize ii<-1# Code the while loopwhile (i<=10){print(i*3)if ((i*3)%%8==0){break}i<-i+1}
# The nyc list is already specifiednyc<-list(pop=8405837,boroughs=c('Manhattan','Bronx','Brooklyn','Queens','Staten Island'),capital=FALSE)# Loop version 1for (iteminnyc){print(item)}
# The tic-tac-toe matrix has already been defined for youttt<-matrix(c('O',NA,'X',NA,'O',NA,'X','O','X'),nrow=3,ncol=3)# define the double for loopfor (iin1:nrow(ttt)){for (jin1:ncol(ttt)){print(paste('On row',i,'and column',j,'the board contains ',ttt[i,j]))}}
123456789
## [1] "On row 1 and column 1 the board contains O"
## [1] "On row 1 and column 2 the board contains NA"
## [1] "On row 1 and column 3 the board contains X"
## [1] "On row 2 and column 1 the board contains NA"
## [1] "On row 2 and column 2 the board contains O"
## [1] "On row 2 and column 3 the board contains O"
## [1] "On row 3 and column 1 the board contains X"
## [1] "On row 3 and column 2 the board contains NA"
## [1] "On row 3 and column 3 the board contains X"
# The linkedin vectorlinkedin<-c(16,9,13,5,2,17,14)# Code the for loop with conditionalsfor (iin1:length(linkedin)){if (linkedin[i]>10){print('You\'re popular!')}else{print('Be more visible!')}print(linkedin[i])}
# The linkedin vectorlinkedin<-c(16,9,13,5,2,17,14)# Extend the for loopfor (liinlinkedin){if (li>10){print('You\'re popular!')}else{print('Be more visible!')}# Add code to conditionally break iterationif (li>16){print('This is ridiculous, I\'m outta here!')break}# Add code to conditionally skip iterationif (li<5){print('This is too embarrassing!')next}print(li)}
1 2 3 4 5 6 7 8 9101112
## [1] "You're popular!"
## [1] 16
## [1] "Be more visible!"
## [1] 9
## [1] "You're popular!"
## [1] 13
## [1] "Be more visible!"
## [1] 5
## [1] "Be more visible!"
## [1] "This is too embarrassing!"
## [1] "You're popular!"
## [1] "This is ridiculous, I'm outta here!"
Build a for loop from scratch
# Pre-defined variablesrquote<-'R\'s internals are irrefutably intriguing'chars<-strsplit(rquote,split='')[[1]]rcount<-0# Your solution herefor (iin1:length(chars)){if (chars[i]=='u'){break}if (chars[i]=='r'|chars[i]=='R'){rcount<-rcount+1}}# Print the resulting rcount variable to the consoleprint(rcount)
# The linkedin and facebook vectorslinkedin<-c(16,9,13,5,2,17,14)facebook<-c(17,7,5,16,8,13,14)# Calculate average number of viewsavg_li<-mean(linkedin)avg_fb<-mean(facebook)# Inspect avg_li and avg_fbprint(avg_li)
1
## [1] 10.85714
print(avg_fb)
1
## [1] 11.42857
avg_li
1
## [1] 10.85714
# Calculate the mean of linkedin minus facebookprint(mean(linkedin-facebook))
1
## [1] -0.5714286
Use a function (2)
# The linkedin and facebook vectorslinkedin<-c(16,9,13,5,2,17,14)facebook<-c(17,7,5,16,8,13,14)# Calculate the mean of the sumavg_sum<-mean(linkedin+facebook)# Calculate the trimmed mean of the sumavg_sum_trimmed<-mean((linkedin+facebook),trim=0.2)# Inspect both new variablesavg_sum
1
## [1] 22.28571
avg_sum_trimmed
1
## [1] 22.6
Use a function (3)
# The linkedin and facebook vectorslinkedin<-c(16,9,13,5,NA,17,14)facebook<-c(17,NA,5,16,8,13,14)# Basic average of linkedinprint(mean(linkedin))
1
## [1] NA
# Advanced average of facebookprint(mean(facebook,na.rm=TRUE))
# The linkedin and facebook vectorslinkedin<-c(16,9,13,5,NA,17,14)facebook<-c(17,NA,5,16,8,13,14)# Calculate the mean absolute deviationmean((abs(linkedin-facebook)),na.rm=TRUE)
# Create a function pow_two()pow_two<-function(arg1){arg1^2}# Use the function pow_two(12)
1
## [1] 144
# Create a function sum_abs()sum_abs<-function(arg2,arg3){abs(arg2)+abs(arg3)}# Use the functionsum_abs(-2,3)
1
## [1] 5
Write your own function (2)
# Define the function hello()hello<-function(){print('Hi there!')return(TRUE)}# Call the function hello()hello()
123
## [1] "Hi there!"
## [1] TRUE
# Define the function my_filter()my_filter<-function(arg1){if (arg1>0){return(arg1)}else{return(NULL)}}# Call the function my_filter() twicemy_filter(5)
1
## [1] 5
my_filter(-5)
1
## NULL
Write your own function (3)
Variables inside a function are not in the Global Environment.
# Extend the pow_two() functionpow_two<-function(x,print_info=TRUE){y<-x^2if (print_info){print(paste(x,'to the power two equals',y))}return(y)}#pow_two(2)pow_two(2,FALSE)
# The linkedin and facebook vectorslinkedin<-c(16,9,13,5,NA,17,14)facebook<-c(17,7,5,16,8,13,14)# Define the interpret functioninterpret<-function(arg){if (arg>15){print('You\'re popular!')return(arg)}else{print('Try to be more visible!')return(0)}}interpret(linkedin[1])
123
## [1] "You're popular!"
## [1] 16
interpret(facebook[2])
123
## [1] "Try to be more visible!"
## [1] 0
R you functional? (2)
# The linkedin and facebook vectorslinkedin<-c(16,9,13,5,2,17,14)facebook<-c(17,7,5,16,8,13,14)# The interpret() can be used inside interpret_all()interpret<-function(num_views){if (num_views>15){print('You\'re popular!')return(num_views)}else{print('Try to be more visible!')return(0)}}# Define the interpret_all() functioninterpret_all<-function(data,logi=TRUE){yy<-0for (iindata){yy<-yy+interpret(i)}if (logi){return(yy)}else{return(NULL)}}# Call the interpret_all() function on both linkedin and facebookinterpret_all(linkedin)
123456789
## [1] "You're popular!"
## [1] "Try to be more visible!"
## [1] "Try to be more visible!"
## [1] "Try to be more visible!"
## [1] "Try to be more visible!"
## [1] "You're popular!"
## [1] "Try to be more visible!"
## [1] 33
interpret_all(facebook)
123456789
## [1] "You're popular!"
## [1] "Try to be more visible!"
## [1] "Try to be more visible!"
## [1] "You're popular!"
## [1] "Try to be more visible!"
## [1] "Try to be more visible!"
## [1] "Try to be more visible!"
## [1] 33
# The vector pioneerspioneers<-c('GAUSS:1777','BAYES:1702','PASCAL:1623','PEARSON:1857')# Split names from birth year: split_mathsplit_math<-strsplit(pioneers,':')# Convert to lowercase strings: split_lowsplit_low<-lapply(split_math,tolower)# Take a look at the structure of split_lowstr(split_low)
# Code from previous exercisepioneers<-c('GAUSS:1777','BAYES:1702','PASCAL:1623','PEARSON:1857')split<-strsplit(pioneers,split=':')split_low<-lapply(split,tolower)# Write function select_first()select_first<-function(x){return(x[1])}# Apply select_first() over split_low: namesnames<-lapply(split_low,select_first)print(names)
# Write function select_second()select_second<-function(x){return(x[2])}# Apply select_second() over split_low: yearsyears<-lapply(split_low,select_second)print(years)
# Definition of split_lowpioneers<-c('GAUSS:1777','BAYES:1702','PASCAL:1623','PEARSON:1857')split<-strsplit(pioneers,split=':')split_low<-lapply(split,tolower)# Replace the select_*() functions by a single function: select_elselect_el<-function(x,i){x[i]}#select_second <- function(x) { # x[2] #}# Call the select_el() function twice on split_low: names and yearsnames<-lapply(split_low,select_el,i=1)years<-lapply(split_low,select_el,2)
temp1<-c(3,7,9,6,-1)temp2<-c(6,9,12,13,5)temp3<-c(4,8,3,-1,-3)temp4<-c(1,4,7,2,-2)temp5<-c(5,7,9,4,2)temp6<-c(-3,5,8,9,4)temp7<-c(3,6,9,4,1)temp<-list(temp1,temp2,temp3,temp4,temp5,temp6,temp7)# Use lapply() to find each day's minimum temperaturelapply(temp,min)
# Use sapply() to find each day's maximum temperaturesapply(temp,max)
1
## [1] 9 13 8 7 9 9 9
sapply with your own function
# temp is already defined in the workspace# Define a function calculates the average of the min and max of a vector: extremes_avgextremes_avg<-function(x){return((min(x)+max(x))/2)}# Apply extremes_avg() over temp using sapply()sapply(temp,extremes_avg)
1
## [1] 4.0 9.0 2.5 2.5 5.5 3.0 5.0
# Apply extremes_avg() over temp using lapply()lapply(temp,extremes_avg)
# temp is already available in the workspace# Create a function that returns min and max of a vector: extremesextremes<-function(x){c(min(x),max(x))}# Apply extremes() over temp with sapply()sapply(temp,extremes)
# temp is already prepared for you in the workspace# Create a function that returns all values below zero: below_zerobelow_zero<-function(x){x[x<0]}#below_zero(temp) alone won't work!!!# Apply below_zero over temp using sapply(): freezing_sfreezing_s<-sapply(temp,below_zero)# Apply below_zero over temp using lapply(): freezing_lfreezing_l<-lapply(temp,below_zero)# Compare freezing_s to freezing_l using identical()identical(freezing_s,freezing_l)
1
## [1] TRUE
sapply with functions that return NULL
# temp is already available in the workspace# Write a function that 'cat()s' out the average temperatures: print_infoprint_info<-function(x){cat('The average temperature is',mean(x),'\n')}# Apply print_info() over temp using lapply()lapply(temp,print_info)
## The average temperature is 4.8
## The average temperature is 9
## The average temperature is 2.2
## The average temperature is 2.4
## The average temperature is 5.4
## The average temperature is 4.6
## The average temperature is 4.6
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
# Apply print_info() over temp using sapply()sapply(temp,print_info)
## The average temperature is 4.8
## The average temperature is 9
## The average temperature is 2.2
## The average temperature is 2.4
## The average temperature is 5.4
## The average temperature is 4.6
## The average temperature is 4.6
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
# temp is already available in the workspace# Code the basics() functionbasics<-function(x){c(minimum=min(x),average=mean(x),maximum=max(x))}# Apply basics() over temp using vapply()vapply(temp,basics,numeric(3))
# temp is already available in the workspace# Definition of the basics() functionbasics<-function(x){c(min=min(x),mean=mean(x),median=median(x),max=max(x))}# Fix the error:vapply(temp,basics,numeric(4))
# temp is already defined in the workspace# Convert to vapply() expressionvapply(temp,max,numeric(1))
1
## [1] 9 13 8 7 9 9 9
# Convert to vapply() expressionvapply(temp,function(x,y){mean(x)>y},y=5,logical(1))
1
## [1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE
# Definition of get_info (don't change)get_info<-function(x,y){if (mean(x)>y){return('Not too cold!')}else{return('Pretty cold!')}}# Convert to vapply() expressionvapply(temp,get_info,y=5,character(1))
12
## [1] "Pretty cold!" "Not too cold!" "Pretty cold!" "Pretty cold!"
## [5] "Not too cold!" "Pretty cold!" "Pretty cold!"
# work_todos and fun_todos have already been definedwork_todos<-c('Schedule call with team','Fix error in Recommendation System','Respond to Marc from IT')fun_todos<-c('Sleep','Make arrangements for summer trip')# Create a list: todostodos<-list(work_todos,fun_todos)todos
1234567
## [[1]]
## [1] "Schedule call with team"
## [2] "Fix error in Recommendation System"
## [3] "Respond to Marc from IT"
##
## [[2]]
## [1] "Sleep""Make arrangements for summer trip"
# Sort the vectors inside todos alphabeticallylapply(todos,sort)
1234567
## [[1]]
## [1] "Fix error in Recommendation System"
## [2] "Respond to Marc from IT"
## [3] "Schedule call with team"
##
## [[2]]
## [1] "Make arrangements for summer trip""Sleep"
sum; calculate the sum of all the values in a data structure.
mean; calculate the arithmetic mean.
round; round the values to 0 decimal places by default. Try out ?round in the console for variations of round and ways to change
the number of digits to round to.
# The errors vectorerrors<-c(1.9,-2.6,4.0,-9.5,-3.4,7.3)# Sum of absolute rounded values of errorssum(abs(round(errors)))
seq; generate sequences, by specifying the from, to and
by arguments.
rep; replicate elements of vectors and lists.
sort; sort a vector in ascending order. Works on numerics, but
also on character strings and logicals.
rev; reverse the elements in a data structures for which reversal
is defined.
str; display the structure of any R object. append; Merge vectors
or lists.
is.*; check for the class of an R object.
as.*; convert an R object from one class to another.
unlist; flatten (possibly embedded) lists to produce a vector.
# The linkedin and facebook vectorslinkedin<-list(16,9,13,5,2,17,14)facebook<-list(17,7,5,16,8,13,14)# Convert linkedin and facebook to a vector: li_vec and fb_vecli_vec<-unlist(as.vector(linkedin))fb_vec<-unlist(as.vector(facebook))# Append fb_vec to li_vec: social_vecsocial_vec<-append(li_vec,fb_vec)# Sort social_vecsort(social_vec,decreasing=TRUE)
grepl; return TRUE when a pattern is found in the corresponding
character string.
grep; return a vector of indices of the character strings that
contains the pattern.
# The emails vector hasemails<-c('john.doe@ivyleague.edu','education@world.gov','dalai.lama@peace.org','invalid.edu','quant@bigdatacollege.edu','cookie.monster@sesame.tv')# Use grepl() to match for 'edu'print(grepl(pattern='edu',x=emails))
1
## [1] TRUE TRUE FALSE TRUE TRUE FALSE
# Use grep() to match for 'edu', save result to hitshits<-grep(pattern='edu',x=emails)hits
# The emails vectoremails<-c('john.doe@ivyleague.edu','education@world.gov','dalai.lama@peace.org','invalid.edu','quant@bigdatacollege.edu','cookie.monster@sesame.tv')# Use grep() to match for .edu addresses more robustlyprint(grep(pattern='@.*\\.edu$',x=emails))
1
## [1] 1 5
# Use grepl() to match for .edu addresses more robustly, save result to hitshits<-grepl(pattern='@.*\\.edu$',x=emails)hits
# The emails vectoremails<-c('john.doe@ivyleague.edu','education@world.gov','dalai.lama@peace.org','invalid.edu','quant@bigdatacollege.edu','cookie.monster@sesame.tv')# Use sub() to convert the email domains to datacamp.edu (attempt 1)print(sub(pattern='@.*\\.edu$',replacement='datacamp.edu',x=emails))
# Get the current date: todaytoday<-Sys.Date()today
1
## [1] "2017-04-14"
# See what today looks like under the hoodprint(unclass(today))
1
## [1] 17270
# Get the current time: nownow<-Sys.time()now
1
## [1] "2017-04-14 08:29:36 EDT"
# See what now looks like under the hoodprint(unclass(now))
1
## [1] 1492172976
Create and format dates
Symbol
Meaning
Example
%d
day as a number (0-31)
31-janv
%a
abbreviated weekday
Mon
%A
unabbreviated weekday
Monday
%m
month (00-12)
00-12
%b
abbreviated month
Jan
%B
unabbreviated month
January
%y
2-digit year
07
%Y
4-digit year
2007
%H
hours as a decimal number
23
%M
minutes as a decimal number
10
%S
seconds as a decimal number
53
%T
shorthand notation for the typical format %H:%M:%S
23:10:53
Find out more with ?strptime.
R offer default functions for dealing with time and dates. There are
better packages: date and lubridate.
lubridate enhances time-series packages such as zoo and xts, and
works well with dplyr for data wrangling.
library(date)# Definition of character strings representing datesstr1<-"May 23, 96"str2<-"2012-3-15"str3<-"30/January/2006"# Convert the strings to dates: date1, date2, date3date1<-as.date(str1,order="mdy")date1
1
## [1] 23May96
date1<-as.POSIXct(date1,format="%d %m %y")date1
1
## [1] "1996-05-22 20:00:00 EDT"
date2<-as.date(str2,order="ymd")date2
1
## [1] 15Mar2012
date2<-as.POSIXct(date2,format="%d %m %y")date2
1
## [1] "2012-03-14 20:00:00 EDT"
date3<-as.date(str3,order="dmy")date3
1
## [1] 30Jan2006
date3<-as.POSIXct(date3,format="%d %m %y")date3
1
## [1] "2006-01-29 19:00:00 EST"
# Convert dates to formatted stringsformat(date1,"%A")
1
## [1] "mercredi"
format(date2,"%d")
1
## [1] "14"
format(date3,"%b %Y")
1
## [1] "janv. 2006"
# convert dates to character datastrDate2<-as.character(date2)strDate2
1
## [1] "2012-03-14 20:00:00"
Create and format times
# Definition of character strings representing timesstr1<-"2012-3-12 14:23:08"# Convert the strings to POSIXct objects: time1, time2time1<-as.POSIXct(str2,format="%Y-%m-%d %H:%M:%S")# Convert times to formatted strings# Definition of character strings representing datesformat(time1,"%M")
1
## [1] NA
format(time1,"%I:%M %p")
1
## [1] NA
Calculations with dates
# day1, day2, day3, day4 and day5day1<-as.Date("2016-11-21")day2<-as.Date("2016-11-16")day3<-as.Date("2016-11-27")day4<-as.Date("2016-11-14")day5<-as.Date("2016-12-02")# Difference between last and first pizza dayprint(day5-day1)