Commit 703cad64 authored by maximelenormand's avatar maximelenormand

First update

parents
ID,Name,Scholar,Group,W
0,Maxime Lenormand,VSyM8fEAAAAJ,1,1
1,Jose J. Ramasco,aEdK_vsAAAAJ,1,1
2,Thomas Louail,pZAtLJMAAAAJ,1,1
3,Marc Barthelemy,2UTIVeIAAAAJ,1,1
4,Oliva García Cantú Ros,p3dQlwgAAAAJ,1,1
5,Ricardo Herranz,wbmZMFcAAAAJ,1,1
6,Enrique Frias-Martinez,R3GAd3sAAAAJ,1,1
7,Sylvie Huet,VO7AEbcAAAAJ,1,1
8,Guillaume Deffuant,xwYTdNwAAAAJ,1,1
9,Antonia Tugores,JxoxIlYAAAAJ,1,1
10,Floriana Gargiulo,L93QaKYAAAAJ,1,1
11,Maxi San Miguel,AcDm6B0AAAAJ,1,1
12,Juan Murillo Arias,Aiqh23MAAAAJ,1,1
13,Bruno Gonçalves,rtKaL18AAAAJ,1,1
14,Aleix Bassolas,tXaOGyUAAAAJ,1,1
15,Franck Jabot,4YCAAPYAAAAJ,1,1
16,Ronaldo Menezes,eHej2g8AAAAJ,1,1
17,Hugo S. Barbosa,dsBgmWgAAAAJ,1,1
18,Gourab Ghoshal,_CNYi6MAAAAJ,1,1
19,Marcello Tomasini,QM5kA_MAAAAJ,1,1
20,Filippo Simini,7cf56rIAAAAJ,1,1
21,Pere Colet,7BDoa9YAAAAJ,1,1
22,Thibaut Dubernet,oP68X_gAAAAJ,1,1
23,Tomás Ruiz,brVkRxcAAAAJ,1,1
24,Maria Henar Salas Olmedo,F6Y9PjoAAAAJ,1,1
25,Fabio Lamanna,QrF76o0AAAAJ,1,1
26,Gustavo Romanillos Arroyo,eEEgIjIAAAAJ,1,1
27,Sandra Luque,GtJ4HDAAAAAJ,1,1
28,Guillaume Papuga,NFHREtgAAAAJ,1,1
29,Pablo Fleurquin,62yXZkoAAAAJ,1,1
30,Victor M. Eguiluz,QVPRrksAAAAJ,1,1
31,Ulrike Tappeiner,lZI5BBIAAAAJ,1,1
32,Johannes Langemeyer,zOoWBVkAAAAJ,1,1
33,Juraj Lieskovsky,r5ACMHIAAAAJ,1,1
34,Uta Schirpke,JTMWeZUAAAAJ,1,1
35,Inge Aalders,iYztnbEAAAAJ,1,1
36,Giuca Relu Constantin,GKCZmfQAAAAJ,1,1
37,Leena Kopperoinen,E6hgxfwAAAAJ,1,1
38,Eszter Lelleiné Kovács,LwxjifkAAAAJ,1,1
39,Serban Chivulescu,E_I7icwAAAAJ,1,1
library(networkD3)
wd=""
setwd(wd)
# Load data
net=read.csv2("net.csv", stringsAsFactors=FALSE)
net=net[dim(net)[1]:1,] # Reverse link order to display links with the central node on the top
co=read.csv2("co.csv", stringsAsFactors=FALSE)
# Color edges
colo=rep("lightgrey",dim(net)[1])
colo[ (dim(net)[1] - (dim(co)[1]-2)):dim(net)[1] ]="#1F77B4"
# Plot
G=forceNetwork(Links=net, Nodes=co, NodeID = "name", Group = "group",
# Custom nodes and labels
Nodesize="size", # column names that gives the size of nodes
radiusCalculation = JS("d.nodesize/2+5"), # How to use this column to calculate radius of nodes? (Java script expression)
opacity = 1, # Opacity of nodes when you hover it
opacityNoHover = 0, # Opacity of nodes you do not hover
colourScale = JS("d3.scaleOrdinal(d3.schemeCategory20);"), # Javascript expression, schemeCategory10 and schemeCategory20 work
fontSize = 20, # Font size of labels
fontFamily = "sans serif", # Font family for labels
# Custom edges
Value="value",
arrows = FALSE, # Add arrows?
linkColour = colo, # colour of edges
linkWidth = JS("function(d) { return Math.sqrt(d.value); }"), # edges width
# Layout
linkDistance = 100, # link size, if higher, more space between nodes
charge = -30, # if highly negative, more space between nodes
# General parameters
height = NULL, # height of frame area in pixels
width = NULL,
zoom = FALSE, # Can you zoom on the figure
legend = FALSE, # add a legend?
bounded = TRUE,
clickAction = NULL
)
# Export
htmlwidgets::saveWidget(G,"Coauthorship.html")
library(scholar)
library(vecsets)
wd=""
setwd(wd)
# Load data
co=read.csv("Coauthors.csv", stringsAsFactors=FALSE) # Five columns:
# - ID: Unique integer node id. The first node is the central node around which is build the network.
# - Name: Full name of the author as it will appear on the final network
# - Scholar: Google Scholar id
# - Group: You can define different group of coauthors displayed in different colors
# - W: Weight of the node that will be used to set the size of the circles.
# We will update the value according to the number of publis with the central node.
nco=dim(co)[1] # Number of nodes
# Load functions
simat=function(li, lj){ # Return a matrix of similarities between two vectors of character strings li and lj
# The element ij of the matrix is the fraction of letters in common between the ith string of li and the jth string of lj
res=matrix(0,length(li),length(lj))
for(i in 1:length(li)){
for(j in 1:length(lj)){
split1=unlist(strsplit(tolower(li[i]), ""))
split2=unlist(strsplit(tolower(lj[j]), ""))
res[i,j]=2*length(vintersect(split1,split2))/(nchar(tolower(li[i]))+nchar(tolower(lj[j]))) # Similarity metric
}
}
return(res)
}
duplipubli=function(li, threshold){ # Remove duplicated entries from the vector of character string li
# Duplicated values identify with the similarity metric define in simat
# The degree of similarity can be adjusted with a threshold value
n=length(li)
res=simat(li, li) # Compute similarity matrix to identify potential duplicated values
# Remove iteratively the doublons (string with similarity metric higher than the defined threshold value)
i=1
test=(sum(res>threshold)>n)
while(test){
n=length(li)
indupl=as.numeric(which(res[i,]>threshold))
if(length(indupl)>1){
li=li[-indupl[-1]]
res=res[-indupl[-1],-indupl[-1]]
}
i=i+1
test=(sum(res>threshold)>n)
}
return(li)
}
intersectpubli=function(li, lj, threshold){ # Return the number of articles in common between two vector of character strings li and lj
# Articles in common identify with the similarity metric define in simat
# The degree of similarity can be adjusted with a threshold value
res=simat(li, lj)
return(sum(res>threshold))
}
# Build network
net=NULL
for(i in 1:(nco-2)){
# Extract list of articles of scholar i
idi=co[i,3]
li=get_publications(idi, cstart = 0, pagesize = 100, flush = FALSE)
li=li[!is.na(li$year),] # Remove articles without publication year
li=as.character(li$title)
li=duplipubli(li, threshold=0.95) # Remove doublons
Sys.sleep(1)
for(j in (i+1):nco){
# Extract list of articles of scholar j
idj=co[j,3]
lj=get_publications(idj, cstart = 0, pagesize = 100, flush = FALSE)
lj=lj[!is.na(lj$year),] # Remove articles without publication year
lj=as.character(lj$title)
lj=duplipubli(lj, threshold=0.95) # Remove doublons
Sys.sleep(1)
# Add the number of articles in common between scholar i and j to the network
net=rbind(net, c(co[i,1], co[j,1], intersectpubli(li,lj, threshold=0.95)))
}
}
# Set node weight W according to the number of publis in common with the central node
co[1,5]=get_num_articles(co[1,3]) # If central node: Number of publis of central node
co[2:40,5]=net[1:39,3] # Otherwise: Number of publis in common between the central node and the others
# Clean network for network3D
co=co[,c(2,4,5)]
colnames(co)=c("name","group","size")
net=net[net[,3]>0,]
colnames(net)=c("source","target","value")
# Export network
write.csv2(co, "co.csv", row.names=FALSE, fileEncoding="UTF-8")
write.csv2(net, "net.csv", row.names=FALSE, fileEncoding="UTF-8")
This diff is collapsed.
Building a coauthorship network with R
===================================================================================
This repository contains two scripts written in R for the creation and visualization of coauthorship networks. It focus on an egonetwork centered around one scholar (me in the example). The nodes of the network are my coauthors (people with whom I published at least one paper) and the link between two coauthors is proportional to the number of papers they cosigned (if any). We will first scrap data from Google Scholar using the R package **scholar** to build the network and then rely on the package **networkD3** to visualize it. **ExtractNetwork** scraps data from Google Scholar using the R package **scholar** to build the network and **DrawNetwork** relies on the package **networkD3** to visualize it.
If you want more details feel free to visit [this post](http://www.maximelenormand.com/Blog/coauthorship).
If you need help, find a bug, want to give me advice or feedback, please contact me!
You can reach me at maxime.lenormand[at]irstea.fr
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment