initial commit v1.0
This commit is contained in:
264
R-Code/newsfokus-3-analysis.R
Normal file
264
R-Code/newsfokus-3-analysis.R
Normal file
@@ -0,0 +1,264 @@
|
||||
require(plyr)
|
||||
require(dplyr)
|
||||
require(stringr)
|
||||
|
||||
|
||||
# PREPARATIONS ------------------------------------------------------------
|
||||
|
||||
|
||||
no <- 0
|
||||
for (r in 1:nrow(cl)) {
|
||||
for (c in 3:ncol(cl)) {
|
||||
no <- no + as.numeric(cl[r,c])
|
||||
}
|
||||
}
|
||||
cat("Sum of country news entries:",no)
|
||||
rm(r,c,no)
|
||||
|
||||
# Save old cl-dataframe for next steps
|
||||
cl_bak <- cl
|
||||
|
||||
# Merge appearances for same country code (but different name)
|
||||
cl <- ddply(cl,"code",numcolwise(sum))
|
||||
target <- which(names(cl) == 'code')[1]
|
||||
cl <- cbind(cl[,1:target,drop=F], data.frame(name="PLACEHOLDER"), cl[,(target+1):length(cl),drop=F])
|
||||
rm(target)
|
||||
cl$name <- "bla"
|
||||
|
||||
# Now choose names for the according codes (first fits)
|
||||
for (r in 1:nrow(cl)) {
|
||||
code <- as.character(cl$code[r])
|
||||
code <- namibiaBug(code)
|
||||
r2 <- 1
|
||||
|
||||
repeat {
|
||||
code2 <- as.character(cl_bak$code[r2])
|
||||
code2 <- namibiaBug(code2)
|
||||
|
||||
if (code2 == code) {
|
||||
name <- as.character(cl_bak$name[r2])
|
||||
cl$name[r] <- name
|
||||
break
|
||||
}
|
||||
else {
|
||||
r2 <- r2 + 1
|
||||
}
|
||||
}
|
||||
rm(r, r2, code, code2, name)
|
||||
}
|
||||
|
||||
|
||||
# Summarize all counts for each country
|
||||
cl_stats <- cl[,1:2]
|
||||
cl_stats["overall"] <- 0
|
||||
overall <- NULL
|
||||
for (r in 1:nrow(cl)) {
|
||||
overall[r] <- 0
|
||||
for (c in 3:ncol(cl)) {
|
||||
overall[r] <- overall[r] + as.numeric(cl[r,c])
|
||||
}
|
||||
cl_stats$overall[r] <- overall[r]
|
||||
}
|
||||
rm(overall, r, c)
|
||||
|
||||
|
||||
# # If we would want the overall-counter in cl
|
||||
# cl["overall"] <- 0
|
||||
# cl$overall <- cl_stats$overall
|
||||
|
||||
|
||||
# # Get all names for one country code (if there are multiple)
|
||||
# for (r in 1:nrow(cl)) {
|
||||
# name <- NULL
|
||||
# no <- 0
|
||||
# code <- as.character(cl$code[r])
|
||||
# code <- namibiaBug(code)
|
||||
# r2 <- 1
|
||||
# repeat {
|
||||
# if (r2 > nrow(cl_bak)) {
|
||||
# break # only end if no next row
|
||||
# }
|
||||
# code2 <- as.character(cl_bak$code[r2])
|
||||
# code2 <- namibiaBug(code2)
|
||||
# if (code2 == code) {
|
||||
# no <- no + 1
|
||||
# name[no] <- as.character(cl_bak$name[r2])
|
||||
# r2 <- r2 + 1
|
||||
# }
|
||||
# else {
|
||||
# r2 <- r2 + 1
|
||||
# }
|
||||
# }
|
||||
# if (length(name) > 1) {
|
||||
# cat("For",code,"there are", no, "names:", name, "\n")
|
||||
# }
|
||||
# rm(r, name, no, code, r2, code2)
|
||||
# }
|
||||
|
||||
# Old dataframe not needed anymore
|
||||
rm(cl_bak)
|
||||
|
||||
|
||||
# Calculate the total and average news entries for each year
|
||||
years <- 2000:2014
|
||||
# Search string for str_detect for every year
|
||||
year_str <- sprintf("^\\d{1,2}\\.%s", years)
|
||||
|
||||
for (r in 1:nrow(cl)) {
|
||||
total <- 0
|
||||
average <- 0
|
||||
for (y in 1:length(years)) {
|
||||
months <- 0
|
||||
for (c in 1:ncol(cl)) {
|
||||
if (str_detect(names(cl)[c], year_str[y])) {
|
||||
total <- total + as.numeric(cl[r,c])
|
||||
months <- months + 1
|
||||
}
|
||||
}
|
||||
colnametotal <- str_c(years[y],"-total")
|
||||
colnameaverg <- str_c(years[y],"-averg")
|
||||
average <- round(total / months, 4)
|
||||
cl_stats[r, colnametotal] <- total
|
||||
cl_stats[r, colnameaverg] <- average
|
||||
total <- 0
|
||||
average <- 0
|
||||
}
|
||||
rm(r, total, average, y, months, c, colnametotal, colnameaverg)
|
||||
}
|
||||
rm(years, year_str)
|
||||
|
||||
|
||||
|
||||
|
||||
# IDENTIFY SURPRISING NEWSFOCUS -------------------------------------------
|
||||
|
||||
|
||||
# Land war 3x öfter genannt als im Monat davor, aber mehr als 50x
|
||||
no <- 0
|
||||
for (c in 4:ncol(cl)) { # starting 1 month later
|
||||
for (r in 1:nrow(cl)) {
|
||||
month <- names(cl)[c]
|
||||
|
||||
# Conditions to fulfill
|
||||
status1 <- cl[r,c] > 3 * cl[r,c-1]
|
||||
status2 <- cl[r,c] > 50
|
||||
|
||||
if (status1 && status2) {
|
||||
no <- no + 1
|
||||
cat("[",no,"] ",as.character(cl$code[r]),": 3x m-1 && >50 in: ", month,"\n", sep = "")
|
||||
}
|
||||
}
|
||||
rm(r,c,month,status1,status2)
|
||||
}
|
||||
rm(no)
|
||||
|
||||
|
||||
# Land wurde in einem Monat 3x öfter als im Jahresdurchschnitt genannt
|
||||
no <- 0
|
||||
for (c in 3:ncol(cl)) {
|
||||
for (r in 1:nrow(cl)) {
|
||||
month <- names(cl)[c]
|
||||
year <- str_extract(month, "\\d{4}")
|
||||
averg <- str_c(year,"-averg")
|
||||
averg <- cl_stats[r,averg]
|
||||
|
||||
# Conditions to fulfill
|
||||
status1 <- cl[r,c] > 3 * averg
|
||||
status2 <- cl[r,c] > 50
|
||||
|
||||
if (status1 && status2) {
|
||||
no <- no + 1
|
||||
cat("[",no,"] ",as.character(cl$code[r]),": 3x year average && >50 ", month,"\n", sep = "") }
|
||||
}
|
||||
rm(r,c,month,year,averg,status1,status2)
|
||||
}
|
||||
rm(no)
|
||||
|
||||
# Final method: Land in einem Monat öfter genannt als alle 3 Monate davor zusammen
|
||||
cl_supfoc_mon <- data.frame(code=NA,name=NA)
|
||||
cl_supfoc_mon[sprintf("%s", tspans)] <- 0
|
||||
no <- 0
|
||||
for (c in 6:ncol(cl)) {
|
||||
for (r in 1:nrow(cl)) {
|
||||
month <- names(cl)[c]
|
||||
code <- as.character(cl$code[r])
|
||||
name <- as.character(cl$name[r])
|
||||
|
||||
# Conditions to fulfill
|
||||
status1 <- cl[r,c] > cl[r,c-1]+cl[r,c-2]+cl[r,c-3]
|
||||
status2 <- cl[r,c] > 50
|
||||
|
||||
if (status1 && status2) {
|
||||
no <- no + 1
|
||||
cl_supfoc_mon[no, "code"] <- code
|
||||
cl_supfoc_mon$name[no] <- name
|
||||
#if (is.null(cl_supfoc_mon[no, month])) { cl_supfoc_mon[no, month] <- 0}
|
||||
#cl_supfoc_mon[no, month] <- cl_supfoc_mon[no, month] + 1
|
||||
cl_supfoc_mon[no, month] <- 1
|
||||
cat("[",no,"] ",as.character(cl$code[r]),": >m-(1:3) && >50 ", month,"\n", sep = "")
|
||||
}
|
||||
}
|
||||
rm(r,c,month,status1,status2)
|
||||
}
|
||||
rm(no, code, name)
|
||||
|
||||
# Clean cl_supfoc_mon: Replace NAs by 0, and sum up multiple appeared countries
|
||||
cl_supfoc_mon[is.na(cl_supfoc_mon)] <- 0
|
||||
cl_supfoc_mon <- ddply(cl_supfoc_mon,c("code", "name"),numcolwise(sum))
|
||||
|
||||
# Delete all month-columns with 0 surprising events
|
||||
cl_supfoc_only_mon <- removeZeroMonths(cl_supfoc_mon, 3, ncol(cl_supfoc_mon))
|
||||
|
||||
|
||||
|
||||
|
||||
# Get total surprising newsfocuses for each country
|
||||
cl_supfoc_total <- data.frame(code=NA, name=NA, total=NA)
|
||||
for (r in 1:nrow(cl_supfoc_mon)) {
|
||||
total <- 0
|
||||
cl_supfoc_total[r,"code"] <- as.character(cl_supfoc_mon$code[r])
|
||||
cl_supfoc_total[r,"name"] <- as.character(cl_supfoc_mon$name[r])
|
||||
for (c in 3:ncol(cl_supfoc_mon)) {
|
||||
total <- total + as.numeric(cl_supfoc_mon[r,c])
|
||||
}
|
||||
cl_supfoc_total[r,"total"] <- total
|
||||
}
|
||||
rm(r, total, c)
|
||||
|
||||
|
||||
|
||||
|
||||
# Total highlights per month + turn around table for graphs
|
||||
cl_supfoc_mon["highlight"] <- 1
|
||||
cl_supfoc_turn_mon <- ddply(cl_supfoc_mon,"highlight", numcolwise(sum))
|
||||
cl_supfoc_mon$highlight <- NULL
|
||||
cl_supfoc_turn_mon$highlight <- NULL
|
||||
cl_supfoc_turn_mon <- data.frame(month = names(cl_supfoc_turn_mon), highs = as.numeric(cl_supfoc_turn_mon[1,]))
|
||||
# Convert %d.%y to valid date class
|
||||
months <- NULL
|
||||
for (m in 1:length(tspans)) {
|
||||
dates <- str_c("15.",tspans[m])
|
||||
months[m] <- dates
|
||||
}
|
||||
rm(m, dates)
|
||||
cl_supfoc_turn_mon$month <- as.Date(months, format = "%d.%m.%Y")
|
||||
rm(months)
|
||||
|
||||
# Delete all month-rows with 0 surprising events
|
||||
cl_supfoc_turn_only_mon <- cl_supfoc_turn_mon[!cl_supfoc_turn_mon$highs == 0,]
|
||||
rownames(cl_supfoc_turn_only_mon) <- NULL
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# # Replace 0s by NAs
|
||||
# for (r in 1:180) {
|
||||
# if (! is.na(cl_total2$highs[r])) {
|
||||
# if (cl_total2$highs[r] == 0) {
|
||||
# cl_total2$highs[r] <- NA
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
Reference in New Issue
Block a user