వాడుకరి:Arjunaraoc/R Scripts/pageview trends yearly
Jump to navigation
Jump to search
##getdata top pageview pages in a ##given language wikiproject for the specified month from mediawiki api ##prj project for te wikipedia is te.wikipedia.org ##prj is wikiproject code as used for http://stats.grok.se/json/ ##ym is yyyymm for which data is required ##pv is data frame consisting of page title, pagerequests .. ##start getting for all anames,else look for the last article for which ## data is present and then get data for remaining articles ## "https://wikimedia.org/api/rest_v1/metrics/pageviews/top/te.wikipedia.org/all-access/2018/03/01" library(jsonlite) library(lubridate) pyy_views<-function(prj="te.wikipedia.org",atype="all-access",y="2017") { urlprefix<-"https://wikimedia.org/api/rest_v1/metrics/pageviews/top/" ##read page views data ##initialise data frame number first for aligned display in Rrm #pv<-data.frame(page_requests=as.integer(),page_title=as.character(),stringsAsFactors=FALSE) y<-substr(y,1,4) #m<-substr(ym,6,7) top<-data.frame(article=as.character(),views=as.integer(),rank=as.integer(),stringsAsFactors=FALSE) m<-1 d<-"all-days" given_date<-paste0(y,"/",m,"/",d) maxmonths<-12 if (leap_year(as.numeric(y))) daysinyear=366 else daysinyear=365 for (m in (1:maxmonths)) { mm<-sprintf("%02d",m) given_date<-paste0(y,"/",mm,"/",d) url<-paste0(urlprefix,prj,'/',atype,'/',given_date) cat("..",m) print(url) pv<-fromJSON(url,flatten=TRUE) pvdf<-pv$items$articles[[1]] #update views of found items if(m==1) { top<-pvdf }else { mvector<-match(top$article,pvdf$article) top$views[top$article %in% pvdf$article]<-top$views[top$article %in% pvdf$article]+pvdf$views[mvector[!is.na(mvector)]] rmvector<-match(pvdf$article,top$article) top<-rbind(top,pvdf[is.na(rmvector),]) } } top<-top[order(-top$views),] top$rank<-rank(top$views,ties.method="max") top$rank<-max(top$rank)-top$rank+1 top$views<-round(top$views/daysinyear,0) row.names(top)<-NULL return (top) } #arguments are list consisting of parameters of top views and then a sublist of articles in a #dataframe, with article,views,rank #compute rank change with latest rank as reference, + more popularity #output dataframe with additional columns rankchange, viewschangep pv_trends<-function(latd,eard) { mvector<-match(latd$article,eard$article) bmvector<-latd$article %in% eard$article latd$rankchange[bmvector]<-latd$rank[bmvector]-eard$rank[mvector[!is.na(mvector)]] latd$viewschangep[bmvector]<-round(((latd$views[bmvector]-eard$views[mvector[!is.na(mvector)]])/eard$views[mvector[!is.na(mvector)]])*100,2) latd$pviews[bmvector]<-eard$views[mvector[!is.na(mvector)]] latd<-latd[!is.na(latd$rankchange),] row.names(latd)<-NULL return (latd) }