Commit 1cc92bf2 authored by Andrew Newdigate's avatar Andrew Newdigate

馃毀 WIP: various data queries

parent 502d2d2d
......@@ -108,3 +108,6 @@ modules/topic-notifications/test/comment.html
modules/topic-notifications/test/reply.html
modules/topic-notifications/test/topic.html
/.gtm/
/scripts/useful-queries/*.csv
/scripts/useful-queries/*.numbers
/scripts/useful-queries/*.png
......@@ -3,6 +3,8 @@
"mongo": true
},
"rules": {
"strict": "off"
"strict": "off",
"no-undef": "off",
"no-unused-vars": "off"
}
}
'use strict';
var AUTO_TAGS = {
'lua': /\blua\b/i,
'c#': /c#|csharp/i,
'c++': /(c\+\+|cplusplus|\bcpp\b)/i,
'swift': /\bswift\b/i,
'jquery': /\bjquery\b/i,
'objective-c': /obj(ective)?-?c/i,
'ios': /\bios\b/i,
'ruby': /\bruby\b/i,
'r': /\br\b/i,
'scala': /\b(scala|sbt|akka)\b/i,
'julia': /\b(julia|julialang)\b/i,
'go': /\b(golang|go)\b/i,
'rust': /\b(rust|crate|rustlang)\b/i,
'c': /\bc\b/i,
'shell': /\b(shell|bash|zsh)\b/i,
'perl': /\b(perl)\b/i,
'groovy': /\b(groovy|grails)\b/i,
'd': /\b(d)\b/i,
'dotnet': /\b(dotnet|.net|wpf|aspnet)\b/i,
'aws': /\b(aws|ec2|s3)\b/i,
'spark': /\b(spark)\b/i,
'elasticsearch': /\b(elasticsearch)\b/i,
'powershell': /\b(powershell)\b/i,
'typescript': /\b(typescript)\b/i,
'clojure': /\b(clojure)\b/i,
'haskell': /\b(haskell)\b/i,
'erlang': /\b(erlang)\b/i,
'akka': /\b(akka)\b/i,
'javascript': /\b(js|javascript)\b/i,
'dart': /\b(dart)\b/i,
'angularjs': /\b(angularjs|angular)\b/i,
'ethereum': /\b(ethereum)\b/i,
'mongodb': /\b(mongodb)\b/i,
'ansible': /\b(ansible)\b/i,
}
Object.keys(AUTO_TAGS).forEach(function(tag) {
var re = AUTO_TAGS[tag];
print(tag);
db.troupes.update({
topic: re
}, {
$addToSet: {
tags: tag
}
}, {
multi: true
});
})
rs.slaveOk()
var horizonTimestamp = Date.now() - 86400000 * 30;
function createIdForTimestampString(timestamp) {
var hexSeconds = Math.floor(timestamp/1000).toString(16);
while(hexSeconds.length < 8) {
hexSeconds = "0" + hexSeconds;
}
return ObjectId(hexSeconds + "0000000000000000");
}
var a = db.chatmessages.aggregate([{
$match: {
_id: { $gt: createIdForTimestampString(horizonTimestamp) },
sent: { $type: 'date' }
}
}, {
$group: {
_id: '$toTroupeId',
days: { $addToSet: { $dayOfYear: "$sent" } },
users: { $addToSet: "$fromUserId" },
count: { $sum: 1 }
},
}, {
$project: {
roomId: '$_id',
days: { $size: '$days' },
uniqueUserCount: { $size: '$users' },
count: '$count'
}
}, {
$sort: {
count: -1
}
}, {
$limit: 1000
}, {
$match: {
uniqueUserCount: { $gt: 3 }
}
}, {
$lookup: {
from: "troupes",
localField: "roomId",
foreignField: "_id",
as: "troupe"
}
}, {
$unwind: "$troupe"
}, {
$project: {
roomId: '$_id',
uri: '$troupe.uri',
public: '$troupe.sd.public',
totalUsers: '$troupe.userCount',
activeUsers: '$uniqueUserCount',
activeDays: '$days',
totalMessages: '$count'
}
}]);
print('uri,public,activeDays,totalMessages,activeUsers,totalUsers')
a.forEach(function(i) {
print(i.uri + ',' + i.public + ',' + i.activeDays + ',' + i.totalMessages + ',' + i.activeUsers + ',' + i.totalUsers)
})
rs.slaveOk()
var horizonTimestamp = Date.now() - 86400000 * 30;
function createIdForTimestampString(timestamp) {
var hexSeconds = Math.floor(timestamp/1000).toString(16);
while(hexSeconds.length < 8) {
hexSeconds = "0" + hexSeconds;
}
return ObjectId(hexSeconds + "0000000000000000");
}
var a = db.chatmessages.aggregate([{
$match: {
_id: { $gt: createIdForTimestampString(horizonTimestamp) },
sent: { $type: 'date' }
}
}, {
$group: {
_id: '$fromUserId',
rooms: { $addToSet: "$toTroupeId" },
totalMessages: { $sum: 1 },
days: { $addToSet: { $dayOfYear: "$sent" } }
},
}, {
$project: {
userId: '$_id',
activeRooms: { $size: '$rooms' },
activeDays: { $size: '$days' },
totalMessages: '$totalMessages'
}
}]);
print('activeRooms,activeRooms,activeDays,totalMessages')
a.forEach(function(i) {
print(i.userId + ',' + i.activeRooms + ',' + i.activeDays + ',' + i.totalMessages)
})
rs.slaveOk()
var horizonTimestamp = Date.now() - 86400000 * 30;
function createIdForTimestampString(timestamp) {
var hexSeconds = Math.floor(timestamp/1000).toString(16);
while(hexSeconds.length < 8) {
hexSeconds = "0" + hexSeconds;
}
return ObjectId(hexSeconds + "0000000000000000");
}
var a = db.chatmessages.aggregate([{
$match: {
_id: { $gt: createIdForTimestampString(horizonTimestamp) },
sent: { $type: 'date' }
}
}, {
$group: {
_id: '$fromUserId',
rooms: { $addToSet: "$toTroupeId" },
days: { $addToSet: { $dayOfYear: "$sent" } }
},
}, {
$project: {
userId: '$_id',
activeRooms: { $size: '$rooms' },
activeDays: { $size: '$days' },
}
}, {
$group: {
_id: {
activeRooms: '$activeRooms',
activeDays: '$activeDays',
},
count: { $sum: 1 }
},
}, {
$project: {
activeRooms: '$_id.activeRooms',
activeDays: '$_id.activeDays',
count: '$count'
}
}, {
$sort: {
activeDays: -1,
activeRooms: -1
}
}]);
print('activeRooms,activeDays,count')
a.forEach(function(i) {
print(i.activeRooms + ',' + i.activeDays + ',' + i.count)
})
rs.slaveOk()
var horizonTimestamp = Date.now() - 86400000 * 30;
function createIdForTimestampString(timestamp) {
var hexSeconds = Math.floor(timestamp/1000).toString(16);
while(hexSeconds.length < 8) {
hexSeconds = "0" + hexSeconds;
}
return ObjectId(hexSeconds + "0000000000000000");
}
var a = db.chatmessages.aggregate([{
$match: {
_id: { $gt: createIdForTimestampString(horizonTimestamp) },
sent: { $type: 'date' }
}
}, {
$group: {
_id: '$fromUserId',
rooms: { $addToSet: "$toTroupeId" },
days: { $addToSet: { $dayOfYear: "$sent" } }
},
}, {
$project: {
userId: '$_id',
activeRooms: { $size: '$rooms' },
activeDays: { $size: '$days' },
}
}, {
$group: {
_id: '$activeRooms',
avgActiveDays: { $avg: '$activeDays' }
},
}, {
$project: {
activeRooms: '$_id',
avgActiveDays: '$avgActiveDays',
}
}, {
$sort: {
activeRooms: 1,
}
}]);
print('activeRooms,avgActiveDays')
a.forEach(function(i) {
print(i.activeRooms + ',' + i.avgActiveDays)
})
rs.slaveOk()
var horizonTimestamp = Date.now() - 86400000 * 7 * 8;
function createIdForTimestampString(timestamp) {
var hexSeconds = Math.floor(timestamp/1000).toString(16);
while(hexSeconds.length < 8) {
hexSeconds = "0" + hexSeconds;
}
return ObjectId(hexSeconds + "0000000000000000");
}
var a = db.chatmessages.aggregate([{
$match: {
_id: { $gt: createIdForTimestampString(horizonTimestamp) },
sent: { $type: 'date' },
lang: { $ne: null }
}
}, {
$group: {
_id: {
lang: '$lang',
dayOfWeek: { $dayOfWeek: "$sent" }
},
count: { $sum: 1 }
},
}, {
$project: {
'_id': 0,
'lang': '$_id.lang',
'dayOfWeek': '$_id.dayOfWeek',
'count': '$count'
}
}, {
$sort: {
lang: 1,
dayOfWeek: 1,
}
}]);
var LISTED_LANGUAGES = ["en", "ru", "zh", "fr", "de", "zh-Hant", "es",
"ja", "da", "nl", "pl", "el", "cs", "no", "vi",
"sr", "ko", "id", "sv", "it", "sk", "uk", "hr",
"ro", "lv"];
var results = {};
LISTED_LANGUAGES.forEach(function(lang) {
var a = [];
for (var i = 1; i <= 7; i++) {
a[i] = 0;
}
results[lang] = a;
});
a.forEach(function(i) {
if(!results[i.lang]) return;
results[i.lang][i.dayOfWeek] = i.count;
});
print(['day'].concat(LISTED_LANGUAGES).join(','));
for (var day = 1; day <=7; day++) {
var x = [day];
LISTED_LANGUAGES.forEach(function(lang) {
x.push(results[lang][day]);
});
print(x.join(','));
}
rs.slaveOk()
var horizonTimestamp = Date.now() - 86400000 * 31;
function createIdForTimestampString(timestamp) {
var hexSeconds = Math.floor(timestamp/1000).toString(16);
while(hexSeconds.length < 8) {
hexSeconds = "0" + hexSeconds;
}
return ObjectId(hexSeconds + "0000000000000000");
}
var a = db.chatmessages.aggregate([{
$match: {
_id: { $gt: createIdForTimestampString(horizonTimestamp) },
sent: { $type: 'date' },
lang: { $ne: null }
}
}, {
$group: {
_id: {
lang: '$lang',
hour: { $hour: "$sent" }
},
count: { $sum: 1 }
},
}, {
$project: {
'_id': 0,
'lang': '$_id.lang',
'hour': '$_id.hour',
'count': '$count'
}
}, {
$sort: {
lang: 1,
hour: 1,
}
}]);
var LISTED_LANGUAGES = ["en", "ru", "zh", "fr", "de", "zh-Hant", "es",
"ja", "da", "nl", "pl", "el", "cs", "no", "vi",
"sr", "ko", "id", "sv", "it", "sk", "uk", "hr",
"ro", "lv"];
var results = {};
LISTED_LANGUAGES.forEach(function(lang) {
var a = [];
for (var i = 0; i < 24; i++) {
a[i] = 0;
}
results[lang] = a;
});
a.forEach(function(i) {
if(!results[i.lang]) return;
results[i.lang][i.hour] = i.count;
});
print(['hour'].concat(LISTED_LANGUAGES).join(','));
for (var hour = 0; hour < 24; hour++) {
var x = [hour];
LISTED_LANGUAGES.forEach(function(lang) {
x.push(results[lang][hour]);
});
print(x.join(','));
}
rs.slaveOk()
load('./csv.js');
var period = 86400000 * 30;
var now = Date.now();
var p1 = new Date(now - period);
var p2 = now - period * 2;
function createIdForTimestampString(timestamp) {
var hexSeconds = Math.floor(timestamp/1000).toString(16);
while(hexSeconds.length < 8) {
hexSeconds = "0" + hexSeconds;
}
return ObjectId(hexSeconds + "0000000000000000");
}
var a = db.chatmessages.aggregate([{
$match: {
_id: { $gt: createIdForTimestampString(p2) },
sent: { $type: 'date' }
}
}, {
$group: {
_id: {
userId: '$fromUserId',
troupeId: '$toTroupeId',
},
firstSent: { $min: '$sent'},
lastSent: { $max: '$sent'},
},
}, {
$project: {
initial: { $cond: [{ $lt: ['$firstSent', p1 ] }, true, false ] },
current: { $cond: [{ $gte: ['$lastSent', p1 ] }, true, false ] },
},
}, {
$project: {
type: { $cond: [{ $eq: ['$initial', true ] },
{ $cond: [{ $eq: ['$current', true ] }, 'retained', 'lost' ]},
{ $cond: [{ $eq: ['$current', true ] }, 'new', 'impossible' ]},
]
}
},
}, {
$group: {
_id: {
troupeId: '$_id.troupeId',
type: '$type',
},
count: {
$sum: 1
}
},
}, {
$group: {
_id: '$_id.troupeId',
v: {
$addToSet: {
type: '$_id.type',
count: '$count'
}
},
},
}, {
$project: {
retained: { $arrayElemAt: [{ $filter: { input: '$v', as: 't', cond: { $eq: ['$$t.type', 'retained'] } } }, 0] },
lost: { $arrayElemAt: [{ $filter: { input: '$v', as: 't', cond: { $eq: ['$$t.type', 'lost'] } } }, 0] },
new: { $arrayElemAt: [{ $filter: { input: '$v', as: 't', cond: { $eq: ['$$t.type', 'new'] } } }, 0] },
},
}, {
$project: {
retained: { $ifNull: ['$retained.count', 0] },
lost: { $ifNull: ['$lost.count', 0] },
new: { $ifNull: ['$new.count', 0] },
},
}, {
$lookup: {
from: "troupes",
localField: "_id",
foreignField: "_id",
as: "troupe"
},
}, {
$unwind: "$troupe"
}, {
$match: {
'troupe.oneToOne': { $ne: true }
}
}, {
$project: {
_id: 1,
uri: '$troupe.uri',
retained: 1,
lost: 1,
new: 1,
}
}]);
printCSV(a, ['uri', 'retained', 'lost', 'new']);
This diff is collapsed.
rs.slaveOk()
load('./csv.js');
function createIdForTimestampString(timestamp) {
var hexSeconds = Math.floor(timestamp/1000).toString(16);
while(hexSeconds.length < 8) {
hexSeconds = "0" + hexSeconds;
}
return ObjectId(hexSeconds + "0000000000000000");
}
function aggregate(start, mid, end) {
return db.chatmessages.aggregate([{
$match: {
_id: {
$lt: createIdForTimestampString(end),
$gt: createIdForTimestampString(start)
},
sent: { $type: 'date' }
}
}, {
$group: {
_id: {
userId: '$fromUserId',
troupeId: '$toTroupeId',
},
firstSent: { $min: '$sent'},
lastSent: { $max: '$sent'},
},
}, {
$project: {
initial: { $cond: [{ $lt: ['$firstSent', mid ] }, true, false ] },
current: { $cond: [{ $gte: ['$lastSent', mid ] }, true, false ] },
},
}, {
$project: {
type: { $cond: [{ $eq: ['$initial', true ] },
{ $cond: [{ $eq: ['$current', true ] }, 'retained', 'lost' ]},
{ $cond: [{ $eq: ['$current', true ] }, 'new', 'impossible' ]},
]
}
},
}, {
$group: {
_id: {
troupeId: '$_id.troupeId',
type: '$type',
},
count: {
$sum: 1
}
},
}, {
$group: {
_id: '$_id.troupeId',
v: {
$addToSet: {
type: '$_id.type',
count: '$count'
}
},
},
}, {
$project: {
retained: { $arrayElemAt: [{ $filter: { input: '$v', as: 't', cond: { $eq: ['$$t.type', 'retained'] } } }, 0] },
lost: { $arrayElemAt: [{ $filter: { input: '$v', as: 't', cond: { $eq: ['$$t.type', 'lost'] } } }, 0] },
new: { $arrayElemAt: [{ $filter: { input: '$v', as: 't', cond: { $eq: ['$$t.type', 'new'] } } }, 0] },
},
}, {
$project: {
retained: { $ifNull: ['$retained.count', 0] },
lost: { $ifNull: ['$lost.count', 0] },
new: { $ifNull: ['$new.count', 0] },
},
}, {
$lookup: {
from: "troupes",
localField: "_id",
foreignField: "_id",
as: "troupe"
},
}, {
$unwind: "$troupe"
}, {
$match: {
'troupe.oneToOne': { $ne: true }
}
}, {
$project: {
_id: 1,
uri: '$troupe.uri',
retained: 1,
lost: 1,
new: 1,
}
}]);
}
var period = 86400000 * 30;
var startP1,midP1,endP1;
var startP0,midP0,endP0;
endP1 = new Date('2016-10-01T00:00:00Z');
midP1 = endP0 = new Date(endP1.valueOf() - period);
startP1 = midP0 = new Date(midP1.valueOf() - period);
startP0 = new Date(midP0.valueOf() - period);
var p1 = aggregate(startP1, midP1, endP1);
var p0 = aggregate(startP0, midP0, endP0);
var lookup = {};
p0.forEach(function(p) {
lookup[p.uri] = p;
});
p1 = p1.toArray();
p1.forEach(function(p) {
var uri = p.uri;
var pPrev = lookup[uri];
if (pPrev) {
p.prevRetained = pPrev.retained;
p.prevLost = pPrev.lost;
p.prevNew = pPrev.new;
}
})
printCSV(p1, ['uri', 'retained', 'lost', 'new', 'prevRetained', 'prevLost', 'prevNew']);
rs.slaveOk()
var roomIds = db.troupes.find({ lcUri: { $in: [
'marionettejs/backbone.marionette',
'freecodecamp/freecodecamp',
'angular/angular.js',
'webpack/webpack',
'gulpjs/gulp',
'minio/minio',
'JuliaLang/julia',
'home-assistant/home-assistant'
] }
}).map(function(f) { return f._id });
var a = db.chatmessages.aggregate([{
$match: {
toTroupeId: { $in: roomIds },
sent: { $type: 'date' }
}
}, {
$group: {
_id: {
userId: '$fromUserId',
troupeId: '$toTroupeId',
},
firstSent: { $min: '$sent'},
lastSent: { $max: '$sent'},
},
}, {
$project: {
firstSent: 1,
lastSent: 1,
duration: { $divide: [{ $subtract: ['$lastSent', '$firstSent']}, 86400000] }
},
}, {
$group: {
_id: '$_id.troupeId',
totalActiveUserCount: { $sum: 1 },
firstSent: { $min: '$firstSent'},
lastSent: { $max: '$lastSent'},
avgDuration: { $avg: '$duration' }
}
}, {
$project: {
firstSent: 1,
lastSent: 1,
totalActiveUserCount: 1,
totalDuration: { $divide: [{ $subtract: ['$lastSent', '$firstSent']}, 86400000] },
avgDuration: 1,
avgOverLifetime: { $divide: ['$avgDuration', { $divide: [{ $subtract: ['$lastSent', '$firstSent']}, 86400000] }] }
},
}, {
$lookup: {
from: "troupes",
localField: "_id",
foreignField: "_id",
as: "troupe"
},
}, {
$unwind: "$troupe"
}, {
$project: {
_id: 1,
uri: '$troupe.uri',
firstSent: 1,
lastSent: 1,
totalActiveUserCount: 1,
totalDuration: 1,
avgDuration: 1,
avgOverLifetime: 1
},
}]);