initial commit v1.0

2014-11-28 18:05:12 +01:00
commit ecd3d5214d
21 changed files with 1709 additions and 0 deletions
--- a/R-Code/newsfokus-3-analysis.R
+++ b/R-Code/newsfokus-3-analysis.R
@@ -0,0 +1,264 @@
+require(plyr)
+require(dplyr)
+require(stringr)
+
+
+# PREPARATIONS ------------------------------------------------------------
+
+
+no <- 0
+for (r in 1:nrow(cl)) {
+  for (c in 3:ncol(cl)) {
+    no <- no + as.numeric(cl[r,c])
+  }
+}
+cat("Sum of country news entries:",no)
+rm(r,c,no)
+
+# Save old cl-dataframe for next steps
+cl_bak <- cl
+
+# Merge appearances for same country code (but different name)
+cl <- ddply(cl,"code",numcolwise(sum))
+target <- which(names(cl) == 'code')[1]
+cl <- cbind(cl[,1:target,drop=F], data.frame(name="PLACEHOLDER"), cl[,(target+1):length(cl),drop=F])
+rm(target)
+cl$name <- "bla"
+
+# Now choose names for the according codes (first fits)
+for (r in 1:nrow(cl)) {
+  code <- as.character(cl$code[r])
+  code <- namibiaBug(code)
+  r2 <- 1
+  
+  repeat {  
+    code2 <- as.character(cl_bak$code[r2])
+    code2 <- namibiaBug(code2)
+    
+    if (code2 == code) {
+      name <- as.character(cl_bak$name[r2])
+      cl$name[r] <- name
+      break
+    }
+    else {
+      r2 <- r2 + 1
+    }
+  }
+  rm(r, r2, code, code2, name)
+}
+
+
+# Summarize all counts for each country
+cl_stats <- cl[,1:2]
+cl_stats["overall"] <- 0
+overall <- NULL
+for (r in 1:nrow(cl)) {
+  overall[r] <- 0
+  for (c in 3:ncol(cl)) {
+    overall[r] <- overall[r] + as.numeric(cl[r,c])
+  }
+  cl_stats$overall[r] <- overall[r]
+}
+rm(overall, r, c)
+
+
+# # If we would want the overall-counter in cl
+# cl["overall"] <- 0
+# cl$overall <- cl_stats$overall
+
+
+# # Get all names for one country code (if there are multiple)
+# for (r in 1:nrow(cl)) {
+#   name <- NULL
+#   no <- 0
+#   code <- as.character(cl$code[r])
+#   code <- namibiaBug(code)
+#   r2 <- 1
+#   repeat {
+#     if (r2 > nrow(cl_bak)) {
+#       break  # only end if no next row
+#     }
+#     code2 <- as.character(cl_bak$code[r2])
+#     code2 <- namibiaBug(code2)
+#     if (code2 == code) {
+#       no <- no + 1
+#       name[no] <- as.character(cl_bak$name[r2])
+#       r2 <- r2 + 1
+#     }
+#     else {
+#       r2 <- r2 + 1
+#     }
+#   }
+#   if (length(name) > 1) {
+#     cat("For",code,"there are", no, "names:", name, "\n")
+#   }
+#   rm(r, name, no, code, r2, code2)
+# }
+
+# Old dataframe not needed anymore
+rm(cl_bak)
+
+
+# Calculate the total and average news entries for each year
+years <- 2000:2014
+# Search string for str_detect for every year
+year_str <- sprintf("^\\d{1,2}\\.%s", years)
+
+for (r in 1:nrow(cl)) {
+  total <- 0
+  average <- 0
+  for (y in 1:length(years)) {
+    months <- 0
+    for (c in 1:ncol(cl)) {
+      if (str_detect(names(cl)[c], year_str[y])) {
+        total <- total + as.numeric(cl[r,c])
+        months <- months + 1
+      }
+    }
+    colnametotal <- str_c(years[y],"-total")
+    colnameaverg <- str_c(years[y],"-averg")
+    average <- round(total / months, 4)
+    cl_stats[r, colnametotal] <- total
+    cl_stats[r, colnameaverg] <- average
+    total <- 0
+    average <- 0
+  }
+  rm(r, total, average, y, months, c, colnametotal, colnameaverg)
+}
+rm(years, year_str)
+
+
+
+
+# IDENTIFY SURPRISING NEWSFOCUS -------------------------------------------
+
+
+# Land war 3x öfter genannt als im Monat davor, aber mehr als 50x
+no <- 0
+for (c in 4:ncol(cl)) { # starting 1 month later
+  for (r in 1:nrow(cl)) {
+    month <- names(cl)[c]
+    
+    # Conditions to fulfill
+    status1 <- cl[r,c] > 3 * cl[r,c-1]
+    status2 <- cl[r,c] > 50
+    
+    if (status1 && status2) {
+      no <- no + 1
+      cat("[",no,"] ",as.character(cl$code[r]),": 3x m-1 && >50 in: ", month,"\n", sep = "")
+    }
+  }
+  rm(r,c,month,status1,status2)
+}
+rm(no)
+
+
+# Land wurde in einem Monat 3x öfter als im Jahresdurchschnitt genannt
+no <- 0
+for (c in 3:ncol(cl)) {
+  for (r in 1:nrow(cl)) {
+    month <- names(cl)[c]
+    year <- str_extract(month, "\\d{4}")
+    averg <- str_c(year,"-averg")
+    averg <- cl_stats[r,averg]
+    
+    # Conditions to fulfill
+    status1 <- cl[r,c] > 3 * averg
+    status2 <- cl[r,c] > 50
+    
+    if (status1 && status2) {
+      no <- no + 1
+      cat("[",no,"] ",as.character(cl$code[r]),": 3x year average && >50 ", month,"\n", sep = "")    }
+  }
+  rm(r,c,month,year,averg,status1,status2)
+}
+rm(no)
+
+# Final method: Land in einem Monat öfter genannt als alle 3 Monate davor zusammen
+cl_supfoc_mon <- data.frame(code=NA,name=NA)
+cl_supfoc_mon[sprintf("%s", tspans)] <- 0
+no <- 0
+for (c in 6:ncol(cl)) {
+  for (r in 1:nrow(cl)) {
+    month <- names(cl)[c]
+    code  <- as.character(cl$code[r])
+    name  <- as.character(cl$name[r])
+    
+    # Conditions to fulfill
+    status1 <- cl[r,c] > cl[r,c-1]+cl[r,c-2]+cl[r,c-3]
+    status2 <- cl[r,c] > 50
+    
+    if (status1 && status2) {
+      no <- no + 1
+      cl_supfoc_mon[no, "code"] <- code
+      cl_supfoc_mon$name[no] <- name
+      #if (is.null(cl_supfoc_mon[no, month])) { cl_supfoc_mon[no, month] <- 0}
+      #cl_supfoc_mon[no, month] <- cl_supfoc_mon[no, month] + 1
+      cl_supfoc_mon[no, month] <- 1
+      cat("[",no,"] ",as.character(cl$code[r]),": >m-(1:3) && >50 ", month,"\n", sep = "")    
+    }
+  }
+  rm(r,c,month,status1,status2)
+}
+rm(no, code, name)
+
+# Clean cl_supfoc_mon: Replace NAs by 0, and sum up multiple appeared countries
+cl_supfoc_mon[is.na(cl_supfoc_mon)] <- 0
+cl_supfoc_mon <- ddply(cl_supfoc_mon,c("code", "name"),numcolwise(sum))
+
+# Delete all month-columns with 0 surprising events
+cl_supfoc_only_mon <- removeZeroMonths(cl_supfoc_mon, 3, ncol(cl_supfoc_mon))
+
+
+
+
+# Get total surprising newsfocuses for each country
+cl_supfoc_total <- data.frame(code=NA, name=NA, total=NA)
+for (r in 1:nrow(cl_supfoc_mon)) {
+  total <- 0
+  cl_supfoc_total[r,"code"] <- as.character(cl_supfoc_mon$code[r])
+  cl_supfoc_total[r,"name"] <- as.character(cl_supfoc_mon$name[r])
+  for (c in 3:ncol(cl_supfoc_mon)) {
+    total <- total + as.numeric(cl_supfoc_mon[r,c])
+  }
+  cl_supfoc_total[r,"total"] <- total
+}
+rm(r, total, c)
+
+
+
+
+# Total highlights per month + turn around table for graphs
+cl_supfoc_mon["highlight"] <- 1
+cl_supfoc_turn_mon <- ddply(cl_supfoc_mon,"highlight", numcolwise(sum))
+cl_supfoc_mon$highlight <- NULL
+cl_supfoc_turn_mon$highlight <- NULL
+cl_supfoc_turn_mon <- data.frame(month = names(cl_supfoc_turn_mon), highs = as.numeric(cl_supfoc_turn_mon[1,]))
+# Convert %d.%y to valid date class
+months <- NULL
+for (m in 1:length(tspans)) {
+  dates <- str_c("15.",tspans[m])
+  months[m] <- dates
+}
+rm(m, dates)
+cl_supfoc_turn_mon$month <- as.Date(months, format = "%d.%m.%Y")
+rm(months)
+
+# Delete all month-rows with 0 surprising events
+cl_supfoc_turn_only_mon <- cl_supfoc_turn_mon[!cl_supfoc_turn_mon$highs == 0,]
+rownames(cl_supfoc_turn_only_mon) <- NULL
+
+
+
+
+
+
+# # Replace 0s by NAs
+# for (r in 1:180) {
+#   if (! is.na(cl_total2$highs[r])) {
+#     if (cl_total2$highs[r] == 0) {
+#       cl_total2$highs[r] <- NA
+#     }
+#   }
+# }
+