require(plyr) require(dplyr) require(stringr) # PREPARATIONS ------------------------------------------------------------ no <- 0 for (r in 1:nrow(cl)) { for (c in 3:ncol(cl)) { no <- no + as.numeric(cl[r,c]) } } cat("Sum of country news entries:",no) rm(r,c,no) # Save old cl-dataframe for next steps cl_bak <- cl # Merge appearances for same country code (but different name) cl <- ddply(cl,"code",numcolwise(sum)) target <- which(names(cl) == 'code')[1] cl <- cbind(cl[,1:target,drop=F], data.frame(name="PLACEHOLDER"), cl[,(target+1):length(cl),drop=F]) rm(target) cl$name <- "bla" # Now choose names for the according codes (first fits) for (r in 1:nrow(cl)) { code <- as.character(cl$code[r]) code <- namibiaBug(code) r2 <- 1 repeat { code2 <- as.character(cl_bak$code[r2]) code2 <- namibiaBug(code2) if (code2 == code) { name <- as.character(cl_bak$name[r2]) cl$name[r] <- name break } else { r2 <- r2 + 1 } } rm(r, r2, code, code2, name) } # Summarize all counts for each country cl_stats <- cl[,1:2] cl_stats["overall"] <- 0 overall <- NULL for (r in 1:nrow(cl)) { overall[r] <- 0 for (c in 3:ncol(cl)) { overall[r] <- overall[r] + as.numeric(cl[r,c]) } cl_stats$overall[r] <- overall[r] } rm(overall, r, c) # # If we would want the overall-counter in cl # cl["overall"] <- 0 # cl$overall <- cl_stats$overall # # Get all names for one country code (if there are multiple) # for (r in 1:nrow(cl)) { # name <- NULL # no <- 0 # code <- as.character(cl$code[r]) # code <- namibiaBug(code) # r2 <- 1 # repeat { # if (r2 > nrow(cl_bak)) { # break # only end if no next row # } # code2 <- as.character(cl_bak$code[r2]) # code2 <- namibiaBug(code2) # if (code2 == code) { # no <- no + 1 # name[no] <- as.character(cl_bak$name[r2]) # r2 <- r2 + 1 # } # else { # r2 <- r2 + 1 # } # } # if (length(name) > 1) { # cat("For",code,"there are", no, "names:", name, "\n") # } # rm(r, name, no, code, r2, code2) # } # Old dataframe not needed anymore rm(cl_bak) # Calculate the total and average news entries for each year years <- 2000:2014 # Search string for str_detect for every year year_str <- sprintf("^\\d{1,2}\\.%s", years) for (r in 1:nrow(cl)) { total <- 0 average <- 0 for (y in 1:length(years)) { months <- 0 for (c in 1:ncol(cl)) { if (str_detect(names(cl)[c], year_str[y])) { total <- total + as.numeric(cl[r,c]) months <- months + 1 } } colnametotal <- str_c(years[y],"-total") colnameaverg <- str_c(years[y],"-averg") average <- round(total / months, 4) cl_stats[r, colnametotal] <- total cl_stats[r, colnameaverg] <- average total <- 0 average <- 0 } rm(r, total, average, y, months, c, colnametotal, colnameaverg) } rm(years, year_str) # IDENTIFY SURPRISING NEWSFOCUS ------------------------------------------- # Land war 3x öfter genannt als im Monat davor, aber mehr als 50x no <- 0 for (c in 4:ncol(cl)) { # starting 1 month later for (r in 1:nrow(cl)) { month <- names(cl)[c] # Conditions to fulfill status1 <- cl[r,c] > 3 * cl[r,c-1] status2 <- cl[r,c] > 50 if (status1 && status2) { no <- no + 1 cat("[",no,"] ",as.character(cl$code[r]),": 3x m-1 && >50 in: ", month,"\n", sep = "") } } rm(r,c,month,status1,status2) } rm(no) # Land wurde in einem Monat 3x öfter als im Jahresdurchschnitt genannt no <- 0 for (c in 3:ncol(cl)) { for (r in 1:nrow(cl)) { month <- names(cl)[c] year <- str_extract(month, "\\d{4}") averg <- str_c(year,"-averg") averg <- cl_stats[r,averg] # Conditions to fulfill status1 <- cl[r,c] > 3 * averg status2 <- cl[r,c] > 50 if (status1 && status2) { no <- no + 1 cat("[",no,"] ",as.character(cl$code[r]),": 3x year average && >50 ", month,"\n", sep = "") } } rm(r,c,month,year,averg,status1,status2) } rm(no) # Final method: Land in einem Monat öfter genannt als alle 3 Monate davor zusammen cl_supfoc_mon <- data.frame(code=NA,name=NA) cl_supfoc_mon[sprintf("%s", tspans)] <- 0 no <- 0 for (c in 6:ncol(cl)) { for (r in 1:nrow(cl)) { month <- names(cl)[c] code <- as.character(cl$code[r]) name <- as.character(cl$name[r]) # Conditions to fulfill status1 <- cl[r,c] > cl[r,c-1]+cl[r,c-2]+cl[r,c-3] status2 <- cl[r,c] > 50 if (status1 && status2) { no <- no + 1 cl_supfoc_mon[no, "code"] <- code cl_supfoc_mon$name[no] <- name #if (is.null(cl_supfoc_mon[no, month])) { cl_supfoc_mon[no, month] <- 0} #cl_supfoc_mon[no, month] <- cl_supfoc_mon[no, month] + 1 cl_supfoc_mon[no, month] <- 1 cat("[",no,"] ",as.character(cl$code[r]),": >m-(1:3) && >50 ", month,"\n", sep = "") } } rm(r,c,month,status1,status2) } rm(no, code, name) # Clean cl_supfoc_mon: Replace NAs by 0, and sum up multiple appeared countries cl_supfoc_mon[is.na(cl_supfoc_mon)] <- 0 cl_supfoc_mon <- ddply(cl_supfoc_mon,c("code", "name"),numcolwise(sum)) # Delete all month-columns with 0 surprising events cl_supfoc_only_mon <- removeZeroMonths(cl_supfoc_mon, 3, ncol(cl_supfoc_mon)) # Get total surprising newsfocuses for each country cl_supfoc_total <- data.frame(code=NA, name=NA, total=NA) for (r in 1:nrow(cl_supfoc_mon)) { total <- 0 cl_supfoc_total[r,"code"] <- as.character(cl_supfoc_mon$code[r]) cl_supfoc_total[r,"name"] <- as.character(cl_supfoc_mon$name[r]) for (c in 3:ncol(cl_supfoc_mon)) { total <- total + as.numeric(cl_supfoc_mon[r,c]) } cl_supfoc_total[r,"total"] <- total } rm(r, total, c) # Total highlights per month + turn around table for graphs cl_supfoc_mon["highlight"] <- 1 cl_supfoc_turn_mon <- ddply(cl_supfoc_mon,"highlight", numcolwise(sum)) cl_supfoc_mon$highlight <- NULL cl_supfoc_turn_mon$highlight <- NULL cl_supfoc_turn_mon <- data.frame(month = names(cl_supfoc_turn_mon), highs = as.numeric(cl_supfoc_turn_mon[1,])) # Convert %d.%y to valid date class months <- NULL for (m in 1:length(tspans)) { dates <- str_c("15.",tspans[m]) months[m] <- dates } rm(m, dates) cl_supfoc_turn_mon$month <- as.Date(months, format = "%d.%m.%Y") rm(months) # Delete all month-rows with 0 surprising events cl_supfoc_turn_only_mon <- cl_supfoc_turn_mon[!cl_supfoc_turn_mon$highs == 0,] rownames(cl_supfoc_turn_only_mon) <- NULL # # Replace 0s by NAs # for (r in 1:180) { # if (! is.na(cl_total2$highs[r])) { # if (cl_total2$highs[r] == 0) { # cl_total2$highs[r] <- NA # } # } # }