Hii i have tried to solve assignment 3 please help me last part where we require to find how many plane flew to destination once
#Questions for apply and dplyr family
head(iris)
?iris
str(iris)
#Find the sum of each column and confirm if the sum is greater than 800 or not
which(apply(iris[,1:4],MARGIN = 2,sum) > 800)
#Hint: create a custom function to find the sum and compare it with 800,
#and apply the function on each numerical column
fn = function(data)
{
sum(data)>800
}
apply(iris[,1:4],MARGIN = 2,fn)
#Find the sum / mean / median of Sepal Length species-wise
for (value in levels(iris$Species))
{
print(sum(iris$Sepal.Length[which(iris$Species==value)]))
print(mean(iris$Sepal.Length[which(iris$Species==value)]))
print(median(iris$Sepal.Length[which(iris$Species==value)]))
}
#For all the flowers having sepal width > 3.0, find the number of flowers in each species
table(iris$Species[which(iris$Sepal.Width>3.0)])
#Count how many different petal widths are there in each species.
for (value in levels(iris$Species))
{
print(length(table(iris$Petal.Width[which(iris$Species==value)])))
}
#Soln
?dplyr
library(dplyr)
iris%>%group_by(Species)%>%summarise(n_distinct(Petal.Width))
#Titanic Data set-------------------------------------------------------------
setwd("C:/Users/Alaska/Desktop/Simplilearn/Datascience with R/Live class/practicse")
train = read.csv("titanic_train.csv",na=c("","?","T"))
#Q. Find the proportion survived of males and females survived
aggregate(Survived ~ Sex, data=train, FUN=function(x) {round(sum(x)/length(x),digits=2)})
#Q. Find the proportion survived of males and females
aggregate(Survived ~ Pclass, data=train, FUN=function(x) {round(sum(x)/length(x),digits=2)})
#Q. Find the proportion of people survived passenger class wise and sex wise
aggregate(Survived ~ Pclass + Sex, data=train, FUN=function(x) {round(sum(x)/length(x),digits=2)})
#Q. Strip off the title from the names of the passengers, and create a sep col named title.
#Q. Ultimately we want only 4 levels in this col: Miss Mr. Mrs and Master
train$Title[train$Title %in% c('Lady', 'the Countess', 'Mlle')] <- 'Miss'
#if train$title is one among c('Lady', 'the Countess', 'Mlle') then give true / false
train$Title[train$Title %in% c('Capt', 'Don', 'Major', 'Sir', 'Col', 'Jonkheer', 'Rev', 'Dr') & train$Sex == "male" ] <- 'Mr'
train$Title[train$Title %in% c('Dona','Lady',"Dr","Mme", 'Ms') & train$Sex == "female"] <- 'Mrs'
?table
#hflights------------------------------------------------------------------------------------------
library(hflights)
install.packages("hflights")
#How many flights are not cancelled? Hint: use var cancellation code
table(hflights$CancellationCode)['C']
str(hflights)
hflights$CancellationCode
#Combine year month and day variables to create a date column
hflights$Date<- paste(hflights$DayofMonth,"/",hflights$DayofMonth,"/",
hflights$Year, sep = "")
hflights$Date
#Find the maximum AirTime for all flights whose Departure delay is not NA
max(hflights$AirTime[which(!is.na(hflights$DepDelay))], na.rm = T )
#Find per-carrier mean of arrival delays and arrange them in increasing / decreasing order
hflights%>%group_by(hflights$UniqueCarrier)%>%summarise(n_distinct(hflights$ArrDelay ))
table(hflights$UniqueCarrier, useNA="ifany")
hflights%>%group_by(UniqueCarrier)%>%mean(ArrTime , na.rm = T )
#How many airplanes only flew to one destination from Houston?
hflights[which(hflights$Origin == "HOU"),]->sam
which(table(sam$TailNum)==1)
# Hint: each tail number represents 1 airplane.
summarise_at(group_by(hflights,UniqueCarrier),vars(ArrTime),funs(mean(.,na.rm=TRUE)))
aggregate(hflights$ArrTime, by=list(hflights$UniqueCarrier), FUN=mean)