265 lines
6.5 KiB
R
265 lines
6.5 KiB
R
require(plyr)
|
|
require(dplyr)
|
|
require(stringr)
|
|
|
|
|
|
# PREPARATIONS ------------------------------------------------------------
|
|
|
|
|
|
no <- 0
|
|
for (r in 1:nrow(cl)) {
|
|
for (c in 3:ncol(cl)) {
|
|
no <- no + as.numeric(cl[r,c])
|
|
}
|
|
}
|
|
cat("Sum of country news entries:",no)
|
|
rm(r,c,no)
|
|
|
|
# Save old cl-dataframe for next steps
|
|
cl_bak <- cl
|
|
|
|
# Merge appearances for same country code (but different name)
|
|
cl <- ddply(cl,"code",numcolwise(sum))
|
|
target <- which(names(cl) == 'code')[1]
|
|
cl <- cbind(cl[,1:target,drop=F], data.frame(name="PLACEHOLDER"), cl[,(target+1):length(cl),drop=F])
|
|
rm(target)
|
|
cl$name <- "bla"
|
|
|
|
# Now choose names for the according codes (first fits)
|
|
for (r in 1:nrow(cl)) {
|
|
code <- as.character(cl$code[r])
|
|
code <- namibiaBug(code)
|
|
r2 <- 1
|
|
|
|
repeat {
|
|
code2 <- as.character(cl_bak$code[r2])
|
|
code2 <- namibiaBug(code2)
|
|
|
|
if (code2 == code) {
|
|
name <- as.character(cl_bak$name[r2])
|
|
cl$name[r] <- name
|
|
break
|
|
}
|
|
else {
|
|
r2 <- r2 + 1
|
|
}
|
|
}
|
|
rm(r, r2, code, code2, name)
|
|
}
|
|
|
|
|
|
# Summarize all counts for each country
|
|
cl_stats <- cl[,1:2]
|
|
cl_stats["overall"] <- 0
|
|
overall <- NULL
|
|
for (r in 1:nrow(cl)) {
|
|
overall[r] <- 0
|
|
for (c in 3:ncol(cl)) {
|
|
overall[r] <- overall[r] + as.numeric(cl[r,c])
|
|
}
|
|
cl_stats$overall[r] <- overall[r]
|
|
}
|
|
rm(overall, r, c)
|
|
|
|
|
|
# # If we would want the overall-counter in cl
|
|
# cl["overall"] <- 0
|
|
# cl$overall <- cl_stats$overall
|
|
|
|
|
|
# # Get all names for one country code (if there are multiple)
|
|
# for (r in 1:nrow(cl)) {
|
|
# name <- NULL
|
|
# no <- 0
|
|
# code <- as.character(cl$code[r])
|
|
# code <- namibiaBug(code)
|
|
# r2 <- 1
|
|
# repeat {
|
|
# if (r2 > nrow(cl_bak)) {
|
|
# break # only end if no next row
|
|
# }
|
|
# code2 <- as.character(cl_bak$code[r2])
|
|
# code2 <- namibiaBug(code2)
|
|
# if (code2 == code) {
|
|
# no <- no + 1
|
|
# name[no] <- as.character(cl_bak$name[r2])
|
|
# r2 <- r2 + 1
|
|
# }
|
|
# else {
|
|
# r2 <- r2 + 1
|
|
# }
|
|
# }
|
|
# if (length(name) > 1) {
|
|
# cat("For",code,"there are", no, "names:", name, "\n")
|
|
# }
|
|
# rm(r, name, no, code, r2, code2)
|
|
# }
|
|
|
|
# Old dataframe not needed anymore
|
|
rm(cl_bak)
|
|
|
|
|
|
# Calculate the total and average news entries for each year
|
|
years <- 2000:2014
|
|
# Search string for str_detect for every year
|
|
year_str <- sprintf("^\\d{1,2}\\.%s", years)
|
|
|
|
for (r in 1:nrow(cl)) {
|
|
total <- 0
|
|
average <- 0
|
|
for (y in 1:length(years)) {
|
|
months <- 0
|
|
for (c in 1:ncol(cl)) {
|
|
if (str_detect(names(cl)[c], year_str[y])) {
|
|
total <- total + as.numeric(cl[r,c])
|
|
months <- months + 1
|
|
}
|
|
}
|
|
colnametotal <- str_c(years[y],"-total")
|
|
colnameaverg <- str_c(years[y],"-averg")
|
|
average <- round(total / months, 4)
|
|
cl_stats[r, colnametotal] <- total
|
|
cl_stats[r, colnameaverg] <- average
|
|
total <- 0
|
|
average <- 0
|
|
}
|
|
rm(r, total, average, y, months, c, colnametotal, colnameaverg)
|
|
}
|
|
rm(years, year_str)
|
|
|
|
|
|
|
|
|
|
# IDENTIFY SURPRISING NEWSFOCUS -------------------------------------------
|
|
|
|
|
|
# Land war 3x öfter genannt als im Monat davor, aber mehr als 50x
|
|
no <- 0
|
|
for (c in 4:ncol(cl)) { # starting 1 month later
|
|
for (r in 1:nrow(cl)) {
|
|
month <- names(cl)[c]
|
|
|
|
# Conditions to fulfill
|
|
status1 <- cl[r,c] > 3 * cl[r,c-1]
|
|
status2 <- cl[r,c] > 50
|
|
|
|
if (status1 && status2) {
|
|
no <- no + 1
|
|
cat("[",no,"] ",as.character(cl$code[r]),": 3x m-1 && >50 in: ", month,"\n", sep = "")
|
|
}
|
|
}
|
|
rm(r,c,month,status1,status2)
|
|
}
|
|
rm(no)
|
|
|
|
|
|
# Land wurde in einem Monat 3x öfter als im Jahresdurchschnitt genannt
|
|
no <- 0
|
|
for (c in 3:ncol(cl)) {
|
|
for (r in 1:nrow(cl)) {
|
|
month <- names(cl)[c]
|
|
year <- str_extract(month, "\\d{4}")
|
|
averg <- str_c(year,"-averg")
|
|
averg <- cl_stats[r,averg]
|
|
|
|
# Conditions to fulfill
|
|
status1 <- cl[r,c] > 3 * averg
|
|
status2 <- cl[r,c] > 50
|
|
|
|
if (status1 && status2) {
|
|
no <- no + 1
|
|
cat("[",no,"] ",as.character(cl$code[r]),": 3x year average && >50 ", month,"\n", sep = "") }
|
|
}
|
|
rm(r,c,month,year,averg,status1,status2)
|
|
}
|
|
rm(no)
|
|
|
|
# Final method: Land in einem Monat öfter genannt als alle 3 Monate davor zusammen
|
|
cl_supfoc_mon <- data.frame(code=NA,name=NA)
|
|
cl_supfoc_mon[sprintf("%s", tspans)] <- 0
|
|
no <- 0
|
|
for (c in 6:ncol(cl)) {
|
|
for (r in 1:nrow(cl)) {
|
|
month <- names(cl)[c]
|
|
code <- as.character(cl$code[r])
|
|
name <- as.character(cl$name[r])
|
|
|
|
# Conditions to fulfill
|
|
status1 <- cl[r,c] > cl[r,c-1]+cl[r,c-2]+cl[r,c-3]
|
|
status2 <- cl[r,c] > 50
|
|
|
|
if (status1 && status2) {
|
|
no <- no + 1
|
|
cl_supfoc_mon[no, "code"] <- code
|
|
cl_supfoc_mon$name[no] <- name
|
|
#if (is.null(cl_supfoc_mon[no, month])) { cl_supfoc_mon[no, month] <- 0}
|
|
#cl_supfoc_mon[no, month] <- cl_supfoc_mon[no, month] + 1
|
|
cl_supfoc_mon[no, month] <- 1
|
|
cat("[",no,"] ",as.character(cl$code[r]),": >m-(1:3) && >50 ", month,"\n", sep = "")
|
|
}
|
|
}
|
|
rm(r,c,month,status1,status2)
|
|
}
|
|
rm(no, code, name)
|
|
|
|
# Clean cl_supfoc_mon: Replace NAs by 0, and sum up multiple appeared countries
|
|
cl_supfoc_mon[is.na(cl_supfoc_mon)] <- 0
|
|
cl_supfoc_mon <- ddply(cl_supfoc_mon,c("code", "name"),numcolwise(sum))
|
|
|
|
# Delete all month-columns with 0 surprising events
|
|
cl_supfoc_only_mon <- removeZeroMonths(cl_supfoc_mon, 3, ncol(cl_supfoc_mon))
|
|
|
|
|
|
|
|
|
|
# Get total surprising newsfocuses for each country
|
|
cl_supfoc_total <- data.frame(code=NA, name=NA, total=NA)
|
|
for (r in 1:nrow(cl_supfoc_mon)) {
|
|
total <- 0
|
|
cl_supfoc_total[r,"code"] <- as.character(cl_supfoc_mon$code[r])
|
|
cl_supfoc_total[r,"name"] <- as.character(cl_supfoc_mon$name[r])
|
|
for (c in 3:ncol(cl_supfoc_mon)) {
|
|
total <- total + as.numeric(cl_supfoc_mon[r,c])
|
|
}
|
|
cl_supfoc_total[r,"total"] <- total
|
|
}
|
|
rm(r, total, c)
|
|
|
|
|
|
|
|
|
|
# Total highlights per month + turn around table for graphs
|
|
cl_supfoc_mon["highlight"] <- 1
|
|
cl_supfoc_turn_mon <- ddply(cl_supfoc_mon,"highlight", numcolwise(sum))
|
|
cl_supfoc_mon$highlight <- NULL
|
|
cl_supfoc_turn_mon$highlight <- NULL
|
|
cl_supfoc_turn_mon <- data.frame(month = names(cl_supfoc_turn_mon), highs = as.numeric(cl_supfoc_turn_mon[1,]))
|
|
# Convert %d.%y to valid date class
|
|
months <- NULL
|
|
for (m in 1:length(tspans)) {
|
|
dates <- str_c("15.",tspans[m])
|
|
months[m] <- dates
|
|
}
|
|
rm(m, dates)
|
|
cl_supfoc_turn_mon$month <- as.Date(months, format = "%d.%m.%Y")
|
|
rm(months)
|
|
|
|
# Delete all month-rows with 0 surprising events
|
|
cl_supfoc_turn_only_mon <- cl_supfoc_turn_mon[!cl_supfoc_turn_mon$highs == 0,]
|
|
rownames(cl_supfoc_turn_only_mon) <- NULL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # Replace 0s by NAs
|
|
# for (r in 1:180) {
|
|
# if (! is.na(cl_total2$highs[r])) {
|
|
# if (cl_total2$highs[r] == 0) {
|
|
# cl_total2$highs[r] <- NA
|
|
# }
|
|
# }
|
|
# }
|
|
|