From 45814466899f7c2160a7fc92430d965ab3332aaf Mon Sep 17 00:00:00 2001 From: Jialin Ding Date: Sat, 3 Feb 2018 17:26:34 -0800 Subject: [PATCH 1/7] Add cascades to APLinear --- Milan Moments Creation.ipynb | 2070 +++++++++++++++++ .../macrobase/pipeline/CubePipeline.java | 42 +- lib/cp.txt | 1 + lib/genCP.sh | 2 + lib/momentBench.json | 13 + lib/momentBench.sh | 2 + lib/pom.xml | 5 + .../macrobase/APLMomentSummarizerBench.java | 145 ++ .../futuredata/macrobase/RunConfig.java | 50 + .../summary/aplinear/APLMomentSummarizer.java | 113 + .../summary/aplinear/APLSummarizer.java | 14 +- .../summary/aplinear/APrioriLinear.java | 92 +- .../metrics/EstimatedGlobalRatioMetric.java | 67 + .../metrics/EstimatedSupportMetric.java | 133 ++ .../aplinear/metrics/QualityMetric.java | 38 + lib/src/main/resources/log4j.properties | 8 + 16 files changed, 2770 insertions(+), 25 deletions(-) create mode 100644 Milan Moments Creation.ipynb create mode 100644 lib/cp.txt create mode 100644 lib/genCP.sh create mode 100644 lib/momentBench.json create mode 100644 lib/momentBench.sh create mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java create mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/RunConfig.java create mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLMomentSummarizer.java create mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedGlobalRatioMetric.java create mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java create mode 100644 lib/src/main/resources/log4j.properties diff --git a/Milan Moments Creation.ipynb b/Milan Moments Creation.ipynb new file mode 100644 index 000000000..fe2e6b733 --- /dev/null +++ b/Milan Moments Creation.ipynb @@ -0,0 +1,2070 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "data = pd.read_csv('~/Downloads/sms-call-internet-mi-2013-11-01 2.txt', sep='\\t',\n", + " header=None, names = [\"Grid\", \"Time\", \"Country\", \"SMSin\", \"SMSout\",\n", + " \"Callin\", \"Callout\", \"Internet\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD9CAYAAABA8iukAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEyFJREFUeJzt3WGMpdV93/Hvz1AcksYGEndKd1FByjYV9qqtPQIqV9U0\npLBA1CWVbWGhsLjUW8ngptVKzbp9QWViiVRJqakc1I3ZGqI0hNJErMI62w32KOoLMDi2vF6IzQiv\nw46wSbwYd2vF1pp/X9yz68t6Zufs3Jm9d2a+H+lqn+c857n33KO785tznnOfSVUhSVKPN427AZKk\ntcPQkCR1MzQkSd0MDUlSN0NDktTN0JAkdVsyNJLsTfJKki8PlV2S5GCSF9q/F7fyJLk/yVySLyV5\n59A5O1r9F5LsGCp/V5JD7Zz7k+RMryFJGp+ekcangG2nle0GnqyqLcCTbR/gBmBLe+wEHoBBAAB3\nA1cDVwF3D4XAA8AHh87btsRrSJLGZMnQqKo/AY6dVrwdeKhtPwTcPFT+cA08BVyU5FLgeuBgVR2r\nqleBg8C2duwtVfVUDb5l+PBpz7XQa0iSxmS51zSmqurltv0NYKptbwJeGqp3tJWdqfzoAuVneg1J\n0picP+oTVFUlWdV7kSz1Gkl2MpgO48ILL3zXZZddtprNGbvXX3+dN73JNQwLsW8WZ98szr6Br371\nq39ZVW9bqt5yQ+ObSS6tqpfbFNMrrXweGP6JvbmVzQMzp5XPtvLNC9Q/02v8iKraA+wBmJ6ermef\nfXaZb2ttmJ2dZWZmZtzNmEj2zeLsm8XZN5Dk6z31lhut+4CTK6B2AI8Pld/WVlFdA7zWppgOANcl\nubhdAL8OONCOfSfJNW3V1G2nPddCryFJGpMlRxpJfpfBKOGnkxxlsArqXuDRJHcAXwfe16rvB24E\n5oDvAh8AqKpjSe4Bnmn1PlpVJy+uf4jBCq0LgU+3B2d4DUnSmCwZGlX1/kUOXbtA3QLuXOR59gJ7\nFyh/FnjHAuXfWug1JEnjs7Gv/EiSzoqhIUnqZmhIkroZGpKkboaGJKmboSFJ6jbybUQk6Vy6fPcT\np7aP3HvTGFuyMRkakibecFBovJyekiR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEndXHIraSK5zHYy\nOdKQJHUzNCRJ3ZyekjQxznZKyluKnHuONCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN\n0JAkdTM0JEnd/Ea4pLHyxoRriyMNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTNJbeS1oXTl+76\nR5lWx0gjjST/NsnhJF9O8rtJfizJFUmeTjKX5PeSXNDqvrntz7Xjlw89z0da+VeSXD9Uvq2VzSXZ\nPUpbJUmjW3ZoJNkE/GtguqreAZwH3AL8GnBfVf0M8CpwRzvlDuDVVn5fq0eSK9t5bwe2Ab+Z5Lwk\n5wGfAG4ArgTe3+pKksZk1Gsa5wMXJjkf+HHgZeDngMfa8YeAm9v29rZPO35tkrTyR6rqe1X1NWAO\nuKo95qrqxar6PvBIqytJGpNlh0ZVzQO/Dvw5g7B4Dfg88O2qOtGqHQU2te1NwEvt3BOt/k8Nl592\nzmLlkqQxWfaF8CQXM/jN/wrg28D/ZDC9dM4l2QnsBJiammJ2dnYczThnjh8/vu7f43LZN4ub1L7Z\ntfXE0pWW4Wze66T2zSQaZfXUzwNfq6q/AEjy+8C7gYuSnN9GE5uB+VZ/HrgMONqms94KfGuo/KTh\ncxYrf4Oq2gPsAZienq6ZmZkR3tbkm52dZb2/x+WybxY3qX1z+yrdsPDIrTPddSe1bybRKNc0/hy4\nJsmPt2sT1wLPAZ8F3tPq7AAeb9v72j7t+Geqqlr5LW111RXAFuBzwDPAlrYa6wIGF8v3jdBeSdKI\nlj3SqKqnkzwG/ClwAvgCg9/2nwAeSfKrrezBdsqDwG8nmQOOMQgBqupwkkcZBM4J4M6q+gFAkruA\nAwxWZu2tqsPLba8kaXQjfbmvqu4G7j6t+EUGK59Or/tXwHsXeZ6PAR9boHw/sH+UNkqaPP4NjbXL\n24hIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRuhoYkqZuhIUnq\nZmhIkrqNdJdbSerlnW3XB0cakqRuhoYkqZuhIUnqZmhIkrp5IVzSujR84f3IvTeNsSXriyMNSVI3\nQ0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEnd/HKfpFXjnW3XH0cakqRuhoYkqZuhIUnq\nZmhIkroZGpKkboaGJKnbSKGR5KIkjyX5syTPJ/mHSS5JcjDJC+3fi1vdJLk/yVySLyV559Dz7Gj1\nX0iyY6j8XUkOtXPuT5JR2itJGs2oI42PA39UVX8X+HvA88Bu4Mmq2gI82fYBbgC2tMdO4AGAJJcA\ndwNXA1cBd58Mmlbng0PnbRuxvdLYXL77iVMPaa1admgkeSvwj4EHAarq+1X1bWA78FCr9hBwc9ve\nDjxcA08BFyW5FLgeOFhVx6rqVeAgsK0de0tVPVVVBTw89FySpDEYZaRxBfAXwH9P8oUkn0zyE8BU\nVb3c6nwDmGrbm4CXhs4/2srOVH50gXJJ0piMchuR84F3Ah+uqqeTfJwfTkUBUFWVpEZpYI8kOxlM\neTE1NcXs7Oxqv+RYHT9+fN2/x+WahL45NP/aqe2tm956anvX1hOntsfRxnH0zfB7Hqel3vckfG7W\nilFC4yhwtKqebvuPMQiNbya5tKpeblNMr7Tj88BlQ+dvbmXzwMxp5bOtfPMC9X9EVe0B9gBMT0/X\nzMzMQtXWjdnZWdb7e1yuSeib24euWRy5dWbJ8nNlHH1z+4Rcv1mqvyfhc7NWLHt6qqq+AbyU5Gdb\n0bXAc8A+4OQKqB3A4217H3BbW0V1DfBam8Y6AFyX5OJ2Afw64EA79p0k17RVU7cNPZckaQxGvcvt\nh4HfSXIB8CLwAQZB9GiSO4CvA+9rdfcDNwJzwHdbXarqWJJ7gGdavY9W1bG2/SHgU8CFwKfbQ5I0\nJiOFRlV9EZhe4NC1C9Qt4M5FnmcvsHeB8meBd4zSRknSyvEb4ZKkboaGJKmboSFJ6uafe5W0orxN\nyvpmaEha94aD7Mi9N42xJWuf01OSpG6GhiSpm9NT0gpwHl8bhSMNSVI3Q0OS1M3QkCR1MzQkSd0M\nDUlSN0NDktTN0JAkdTM0JEnd/HKftIr80p/WG0cakqRujjQkjcwR1cbhSEOS1M3QkCR1MzQkSd0M\nDUlSNy+ES8vkxV9tRIaGdBYMCm10hoakDWU4+I/ce9MYW7I2eU1DktTN0JAkdTM0JEndDA1JUjdD\nQ5LUzdVTkpbF5ccbkyMNSVI3RxrSGPhdAa1VjjQkSd1GDo0k5yX5QpI/bPtXJHk6yVyS30tyQSt/\nc9ufa8cvH3qOj7TyryS5fqh8WyubS7J71LZKkkazEiONXwaeH9r/NeC+qvoZ4FXgjlZ+B/BqK7+v\n1SPJlcAtwNuBbcBvtiA6D/gEcANwJfD+VldaVy7f/cSphzTpRgqNJJuBm4BPtv0APwc81qo8BNzc\ntre3fdrxa1v97cAjVfW9qvoaMAdc1R5zVfViVX0feKTVldYtA0STbtSRxn8B/h3wetv/KeDbVXWi\n7R8FNrXtTcBLAO34a63+qfLTzlmsXJI0JstePZXkF4BXqurzSWZWrknLastOYCfA1NQUs7Oz42zO\nqjt+/Pi6f4/Ltdp9s2vriaUrrZCVfh8r3Tfnsi9Wy8n+8P9Uv1GW3L4b+GdJbgR+DHgL8HHgoiTn\nt9HEZmC+1Z8HLgOOJjkfeCvwraHyk4bPWaz8DapqD7AHYHp6umZmZkZ4W5NvdnaW9f4el2u1++b2\nczhtdOTWmRV9vpXum3PZF6vm0P8DYNfWH/DhX5gZb1vWiGVPT1XVR6pqc1VdzuBC9meq6lbgs8B7\nWrUdwONte1/bpx3/TFVVK7+lra66AtgCfA54BtjSVmNd0F5j33LbK0ka3Wp8ue9XgEeS/CrwBeDB\nVv4g8NtJ5oBjDEKAqjqc5FHgOeAEcGdV/QAgyV3AAeA8YG9VHV6F9kqSOq1IaFTVLDDbtl9ksPLp\n9Dp/Bbx3kfM/BnxsgfL9wP6VaKMkaXR+I1yS1M3QkCR1MzQkSd0MDUlSN2+NLqmbtzeRIw1JUjdD\nQ5LUzdCQJHUzNCRJ3bwQLjX+3W5paY40JEndHGloQ3MJqXR2DA1JwunJXk5PSZK6GRqSpG5OT0k6\nI6/7aJihIU0o59g1iQwNaQH+wJYW5jUNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN1dPSdJpXD23OEca\nkqRujjSkJfiNaOmHHGlIkro50pD0IxxdaTGONCRJ3QwNSVI3Q0OS1M1rGtIa4PcGNCkcaUiSuhka\nkqRuyw6NJJcl+WyS55IcTvLLrfySJAeTvND+vbiVJ8n9SeaSfCnJO4eea0er/0KSHUPl70pyqJ1z\nf5KM8mal9eDy3U+84SGdS6Nc0zgB7KqqP03yk8DnkxwEbgeerKp7k+wGdgO/AtwAbGmPq4EHgKuT\nXALcDUwD1Z5nX1W92up8EHga2A9sAz49Qpu1Qa3nawLr+b1p8iw7NKrqZeDltv1/kzwPbAK2AzOt\n2kPALIPQ2A48XFUFPJXkoiSXtroHq+oYQAuebUlmgbdU1VOt/GHgZgwNjcjfznU2DOU3WpFrGkku\nB/4BgxHBVAsUgG8AU217E/DS0GlHW9mZyo8uUC5JGpORl9wm+evA/wL+TVV9Z/iyQ1VVkhr1NTra\nsBPYCTA1NcXs7Oxqv+RYHT9+fN2/x+VarG92bT1x7hszBmf6XCz1uTk0/9qp7V1bV7BRa8DUhX2f\nEf/fjRgaSf4ag8D4nar6/Vb8zSSXVtXLbfrplVY+D1w2dPrmVjbPD6ezTpbPtvLNC9T/EVW1B9gD\nMD09XTMzMwtVWzdmZ2dZ7+9xuRbrm9s3yJTUkVtnFj221Odmo/TRQnZtPcFvHFr6x+GZ+nejGGX1\nVIAHgeer6j8PHdoHnFwBtQN4fKj8traK6hrgtTaNdQC4LsnFbaXVdcCBduw7Sa5pr3Xb0HNJksZg\nlJHGu4FfAg4l+WIr+/fAvcCjSe4Avg68rx3bD9wIzAHfBT4AUFXHktwDPNPqffTkRXHgQ8CngAsZ\nXAD3Irh0Bl601WobZfXU/wEW+97EtQvUL+DORZ5rL7B3gfJngXcst43a2FwlJa08vxEuSepmaEiS\nuhkakqRu3hpd2sC87qOzZWhI69TpgbBr6wlu3/2Eq6o0EkNDkjq5pNnQkDYcp6Q0CkND68qh+dc2\n9O0wpNXm6ilJUjdDQ5LUzdCQJHXzmobWvOELuxvt70BI55ojDUlSN0NDktTN0JAkdTM0JEndDA1J\nUjdXT2lN8lYY0ngYGpK0DBv15oWGhtYMRxfS+BkamliGhDR5DA1NFINCmmyunpIkdTM0JEndnJ7S\nWDgNJa1NjjQkSd0MDUlSN6enJGlEG+mLfoaGzhmvY0hrn6GhVWVQSOuLoaEVZ1BI65ehoWXbSPO4\nkgYMDUlaQev9lylDQ2dlsaknp6SkjcHQ0IIMAUkLmfjQSLIN+DhwHvDJqrp3zE1atwwKaWWtx6mq\niQ6NJOcBnwD+KXAUeCbJvqp6brwtWz8MCklnY6JDA7gKmKuqFwGSPAJsBzZkaJz8Ab9r6wlmFiiX\nNLnWy6hj0kNjE/DS0P5R4OoxtWVFnekH/fAHygvP0vqzlgMkVTXuNiwqyXuAbVX1L9v+LwFXV9Vd\np9XbCexsuz8LfOWcNvTc+2ngL8fdiAll3yzOvlmcfQN/u6retlSlSR9pzAOXDe1vbmVvUFV7gD3n\nqlHjluTZqpoedzsmkX2zOPtmcfZNv0m/NfozwJYkVyS5ALgF2DfmNknShjXRI42qOpHkLuAAgyW3\ne6vq8JibJUkb1kSHBkBV7Qf2j7sdE2bDTMUtg32zOPtmcfZNp4m+EC5JmiyTfk1DkjRBDI01JMl7\nkxxO8nqS6dOOfSTJXJKvJLl+XG2cBEn+Y5L5JF9sjxvH3aZxSrKtfS7mkuwed3smSZIjSQ61z8mz\n427PWjDx1zT0Bl8G/jnw34YLk1zJYGXZ24G/Bfxxkr9TVT84902cGPdV1a+PuxHj5q14uvyTqtro\n39Ho5khjDamq56tqoS8ubgceqarvVdXXgDkGt2CRTt2Kp6q+D5y8FY+0LIbG+rDQ7VY2jaktk+Ku\nJF9KsjfJxeNuzBj52TizAv53ks+3O0toCU5PTZgkfwz8zQUO/Yeqevxct2dSnamfgAeAexj8QLgH\n+A3gX5y71mkN+UdVNZ/kbwAHk/xZVf3JuBs1yQyNCVNVP7+M07put7Ke9PZTkt8C/nCVmzPJNtxn\n42xU1Xz795Ukf8BgOs/QOAOnp9aHfcAtSd6c5ApgC/C5MbdpbJJcOrT7iwwWEGxU3opnEUl+IslP\nntwGrmNjf1a6ONJYQ5L8IvBfgbcBTyT5YlVdX1WHkzzK4O+MnADu3OArp/5Tkr/PYHrqCPCvxtuc\n8fFWPGc0BfxBEhj8LPwfVfVH423S5PMb4ZKkbk5PSZK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRu\nhoYkqZuhIUnq9v8BYuvezWR1bPsAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "np.log(data[\"Internet\"]).hist(bins=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "metric = \"Internet\"\n", + "data = data[[\"Grid\", \"Country\", metric]]\n", + "data = data[np.isfinite(data[metric])]\n", + "# data = data.head(1000000)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GridCountryminmaxm0m1m2m3m4m5m6m7m8
01320.0017870.0017871.00.0017873.194477e-065.709522e-091.020469e-111.823894e-143.259864e-175.826388e-201.041356e-22
11330.0261370.0261375.00.1306873.415825e-038.928086e-052.333572e-066.099355e-081.594214e-094.166866e-111.089111e-12
21394.65279119.834697144.01506.5654501.733675e+042.143301e+052.798359e+063.819016e+075.417785e+087.966347e+091.211974e+11
31460.0017870.02613714.00.2685236.844427e-031.785846e-044.667184e-061.219872e-073.188429e-098.333732e-112.178223e-12
41490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
52320.0009220.0009221.00.0009228.492699e-077.826518e-107.212594e-136.646827e-166.125439e-195.644950e-225.202151e-25
62330.0273560.0273565.00.1367803.741749e-031.023592e-042.800137e-067.660049e-082.095482e-095.732396e-111.568153e-12
72394.65420519.886492144.01512.3629081.747867e+042.170730e+052.847356e+063.903768e+075.562676e+088.213929e+091.254549e+11
82460.0009220.02735614.00.2772467.486895e-032.047216e-045.600277e-061.532010e-074.190964e-091.146479e-103.136307e-12
92490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
103330.0286530.0286535.00.1432654.104998e-031.176209e-043.370201e-069.656667e-082.766933e-097.928119e-112.271651e-12
113394.65571019.941626144.01518.5340731.763052e+042.200233e+052.900338e+063.995898e+075.720999e+088.485816e+091.301526e+11
123460.0286530.02865310.00.2865318.209995e-032.352417e-046.740402e-061.931333e-075.533867e-091.585624e-104.543302e-12
133490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
144320.0042950.0042951.00.0042951.844681e-057.922861e-083.402850e-101.461516e-126.277173e-152.696031e-171.157939e-19
154330.0226080.0226085.00.1130392.555574e-035.777604e-051.306192e-062.953019e-086.676141e-101.509332e-113.412274e-13
164394.64869619.684672144.01489.7730061.692965e+042.065406e+052.660609e+063.583104e+075.018357e+087.290160e+091.096720e+11
174460.0042950.02260814.00.2432585.184936e-031.158690e-042.613745e-065.906622e-081.335253e-093.018674e-116.824554e-13
184490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
195320.0049210.0049211.00.0049212.421239e-051.191397e-075.862398e-102.884657e-121.419427e-146.984444e-173.436772e-19
205330.0217270.0217275.00.1086362.360369e-035.128436e-051.114269e-062.421001e-085.260172e-101.142891e-112.483190e-13
215394.24979717.594501144.01356.4280541.401935e+041.553800e+051.816012e+062.215515e+072.805796e+083.677848e+094.980961e+10
225460.0049210.02172714.00.2369554.817588e-031.030453e-042.230882e-064.843155e-081.052091e-092.285810e-114.966393e-13
235490.0236550.0236553.00.0709641.678625e-033.970727e-059.392609e-072.221787e-085.255557e-101.243183e-112.940704e-13
246330.0286530.0286535.00.1432654.104998e-031.176209e-043.370201e-069.656667e-082.766933e-097.928119e-112.271651e-12
256394.65571019.941626144.01518.5340731.763052e+042.200233e+052.900338e+063.995898e+075.720999e+088.485816e+091.301526e+11
266460.0286530.02865310.00.2865318.209995e-032.352417e-046.740402e-061.931333e-075.533867e-091.585624e-104.543302e-12
276490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
287330.0286530.0286535.00.1432654.104998e-031.176209e-043.370201e-069.656667e-082.766933e-097.928119e-112.271651e-12
297394.65571019.941626144.01518.5340731.763052e+042.200233e+052.900338e+063.995898e+075.720999e+088.485816e+091.301526e+11
..........................................
1325339997860.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325349997882390.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
1325359998330.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325369998340.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
13253799983919.02167181.737160144.05837.4915632.642833e+051.320879e+077.187426e+084.198497e+102.598923e+121.685744e+141.135115e+16
1325389998410.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325399998460.0859950.0859954.00.3439792.958040e-022.543760e-032.187501e-041.881136e-051.617679e-061.391119e-071.196290e-08
1325409998490.0859950.0859956.00.5159694.437061e-023.815640e-033.281251e-042.821704e-052.426518e-062.086679e-071.794435e-08
1325419998860.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325429998882390.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
1325439999330.0859950.2663014.00.5298309.407055e-022.091992e-025.207982e-031.354985e-033.580292e-049.509695e-052.530273e-05
1325449999340.0147080.1747624.00.3642314.603269e-026.676090e-031.049617e-031.732318e-042.938279e-055.057017e-068.769517e-07
13254599993910.34521363.231768144.03857.6627391.215812e+054.409394e+061.793299e+087.978308e+093.801183e+111.907161e+139.951139e+14
1325469999410.0859950.0887674.00.3522963.103386e-022.734283e-032.409512e-042.123691e-051.872100e-061.650596e-071.455544e-08
1325479999430.0887670.0887671.00.0887677.879586e-036.994475e-046.208788e-055.511357e-064.892268e-074.342721e-083.854905e-09
1325489999440.0887670.0887671.00.0887677.879586e-036.994475e-046.208788e-055.511357e-064.892268e-074.342721e-083.854905e-09
1325499999460.0147080.0859958.00.4028093.044565e-022.556486e-032.189372e-041.881412e-051.617719e-061.391125e-071.196291e-08
1325509999490.0147080.0859959.00.5600914.501954e-023.825184e-033.282655e-042.821911e-052.426549e-062.086683e-071.794435e-08
1325519999860.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325529999882390.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
13255310000330.0859950.6247364.01.1272214.844223e-012.625288e-011.561461e-019.595410e-025.961722e-023.717687e-022.321156e-02
13255410000340.0542310.2942404.00.6427111.402795e-013.530072e-029.439573e-032.602313e-037.309347e-042.079667e-045.972406e-05
13255510000398.17025572.936852144.03038.5935347.865887e+042.549735e+061.037753e+085.133570e+092.919976e+111.813755e+131.187581e+15
13255610000410.0859950.2082454.00.7107311.374934e-012.772831e-025.696546e-031.179593e-032.450699e-045.098521e-051.061318e-05
13255710000430.2082450.2082451.00.2082454.336611e-029.030789e-031.880620e-033.916302e-048.155516e-051.698348e-053.536730e-06
13255810000440.2082450.2082451.00.2082454.336611e-029.030789e-031.880620e-033.916302e-048.155516e-051.698348e-053.536730e-06
13255910000460.0542310.0859958.00.5609044.134455e-023.181745e-032.533488e-042.068770e-051.719435e-061.446303e-071.226217e-08
13256010000490.0542310.0859959.00.6786635.319371e-024.294129e-033.540742e-042.962430e-052.502835e-062.128067e-071.816880e-08
13256110000860.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
13256210000882390.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
\n", + "

132563 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Grid Country min max m0 m1 \\\n", + "0 1 32 0.001787 0.001787 1.0 0.001787 \n", + "1 1 33 0.026137 0.026137 5.0 0.130687 \n", + "2 1 39 4.652791 19.834697 144.0 1506.565450 \n", + "3 1 46 0.001787 0.026137 14.0 0.268523 \n", + "4 1 49 0.027300 0.027300 3.0 0.081901 \n", + "5 2 32 0.000922 0.000922 1.0 0.000922 \n", + "6 2 33 0.027356 0.027356 5.0 0.136780 \n", + "7 2 39 4.654205 19.886492 144.0 1512.362908 \n", + "8 2 46 0.000922 0.027356 14.0 0.277246 \n", + "9 2 49 0.027300 0.027300 3.0 0.081901 \n", + "10 3 33 0.028653 0.028653 5.0 0.143265 \n", + "11 3 39 4.655710 19.941626 144.0 1518.534073 \n", + "12 3 46 0.028653 0.028653 10.0 0.286531 \n", + "13 3 49 0.027300 0.027300 3.0 0.081901 \n", + "14 4 32 0.004295 0.004295 1.0 0.004295 \n", + "15 4 33 0.022608 0.022608 5.0 0.113039 \n", + "16 4 39 4.648696 19.684672 144.0 1489.773006 \n", + "17 4 46 0.004295 0.022608 14.0 0.243258 \n", + "18 4 49 0.027300 0.027300 3.0 0.081901 \n", + "19 5 32 0.004921 0.004921 1.0 0.004921 \n", + "20 5 33 0.021727 0.021727 5.0 0.108636 \n", + "21 5 39 4.249797 17.594501 144.0 1356.428054 \n", + "22 5 46 0.004921 0.021727 14.0 0.236955 \n", + "23 5 49 0.023655 0.023655 3.0 0.070964 \n", + "24 6 33 0.028653 0.028653 5.0 0.143265 \n", + "25 6 39 4.655710 19.941626 144.0 1518.534073 \n", + "26 6 46 0.028653 0.028653 10.0 0.286531 \n", + "27 6 49 0.027300 0.027300 3.0 0.081901 \n", + "28 7 33 0.028653 0.028653 5.0 0.143265 \n", + "29 7 39 4.655710 19.941626 144.0 1518.534073 \n", + "... ... ... ... ... ... ... \n", + "132533 9997 86 0.085995 0.085995 1.0 0.085995 \n", + "132534 9997 88239 0.085995 0.085995 2.0 0.171990 \n", + "132535 9998 33 0.085995 0.085995 1.0 0.085995 \n", + "132536 9998 34 0.085995 0.085995 2.0 0.171990 \n", + "132537 9998 39 19.021671 81.737160 144.0 5837.491563 \n", + "132538 9998 41 0.085995 0.085995 1.0 0.085995 \n", + "132539 9998 46 0.085995 0.085995 4.0 0.343979 \n", + "132540 9998 49 0.085995 0.085995 6.0 0.515969 \n", + "132541 9998 86 0.085995 0.085995 1.0 0.085995 \n", + "132542 9998 88239 0.085995 0.085995 2.0 0.171990 \n", + "132543 9999 33 0.085995 0.266301 4.0 0.529830 \n", + "132544 9999 34 0.014708 0.174762 4.0 0.364231 \n", + "132545 9999 39 10.345213 63.231768 144.0 3857.662739 \n", + "132546 9999 41 0.085995 0.088767 4.0 0.352296 \n", + "132547 9999 43 0.088767 0.088767 1.0 0.088767 \n", + "132548 9999 44 0.088767 0.088767 1.0 0.088767 \n", + "132549 9999 46 0.014708 0.085995 8.0 0.402809 \n", + "132550 9999 49 0.014708 0.085995 9.0 0.560091 \n", + "132551 9999 86 0.085995 0.085995 1.0 0.085995 \n", + "132552 9999 88239 0.085995 0.085995 2.0 0.171990 \n", + "132553 10000 33 0.085995 0.624736 4.0 1.127221 \n", + "132554 10000 34 0.054231 0.294240 4.0 0.642711 \n", + "132555 10000 39 8.170255 72.936852 144.0 3038.593534 \n", + "132556 10000 41 0.085995 0.208245 4.0 0.710731 \n", + "132557 10000 43 0.208245 0.208245 1.0 0.208245 \n", + "132558 10000 44 0.208245 0.208245 1.0 0.208245 \n", + "132559 10000 46 0.054231 0.085995 8.0 0.560904 \n", + "132560 10000 49 0.054231 0.085995 9.0 0.678663 \n", + "132561 10000 86 0.085995 0.085995 1.0 0.085995 \n", + "132562 10000 88239 0.085995 0.085995 2.0 0.171990 \n", + "\n", + " m2 m3 m4 m5 m6 \\\n", + "0 3.194477e-06 5.709522e-09 1.020469e-11 1.823894e-14 3.259864e-17 \n", + "1 3.415825e-03 8.928086e-05 2.333572e-06 6.099355e-08 1.594214e-09 \n", + "2 1.733675e+04 2.143301e+05 2.798359e+06 3.819016e+07 5.417785e+08 \n", + "3 6.844427e-03 1.785846e-04 4.667184e-06 1.219872e-07 3.188429e-09 \n", + "4 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", + "5 8.492699e-07 7.826518e-10 7.212594e-13 6.646827e-16 6.125439e-19 \n", + "6 3.741749e-03 1.023592e-04 2.800137e-06 7.660049e-08 2.095482e-09 \n", + "7 1.747867e+04 2.170730e+05 2.847356e+06 3.903768e+07 5.562676e+08 \n", + "8 7.486895e-03 2.047216e-04 5.600277e-06 1.532010e-07 4.190964e-09 \n", + "9 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", + "10 4.104998e-03 1.176209e-04 3.370201e-06 9.656667e-08 2.766933e-09 \n", + "11 1.763052e+04 2.200233e+05 2.900338e+06 3.995898e+07 5.720999e+08 \n", + "12 8.209995e-03 2.352417e-04 6.740402e-06 1.931333e-07 5.533867e-09 \n", + "13 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", + "14 1.844681e-05 7.922861e-08 3.402850e-10 1.461516e-12 6.277173e-15 \n", + "15 2.555574e-03 5.777604e-05 1.306192e-06 2.953019e-08 6.676141e-10 \n", + "16 1.692965e+04 2.065406e+05 2.660609e+06 3.583104e+07 5.018357e+08 \n", + "17 5.184936e-03 1.158690e-04 2.613745e-06 5.906622e-08 1.335253e-09 \n", + "18 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", + "19 2.421239e-05 1.191397e-07 5.862398e-10 2.884657e-12 1.419427e-14 \n", + "20 2.360369e-03 5.128436e-05 1.114269e-06 2.421001e-08 5.260172e-10 \n", + "21 1.401935e+04 1.553800e+05 1.816012e+06 2.215515e+07 2.805796e+08 \n", + "22 4.817588e-03 1.030453e-04 2.230882e-06 4.843155e-08 1.052091e-09 \n", + "23 1.678625e-03 3.970727e-05 9.392609e-07 2.221787e-08 5.255557e-10 \n", + "24 4.104998e-03 1.176209e-04 3.370201e-06 9.656667e-08 2.766933e-09 \n", + "25 1.763052e+04 2.200233e+05 2.900338e+06 3.995898e+07 5.720999e+08 \n", + "26 8.209995e-03 2.352417e-04 6.740402e-06 1.931333e-07 5.533867e-09 \n", + "27 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", + "28 4.104998e-03 1.176209e-04 3.370201e-06 9.656667e-08 2.766933e-09 \n", + "29 1.763052e+04 2.200233e+05 2.900338e+06 3.995898e+07 5.720999e+08 \n", + "... ... ... ... ... ... \n", + "132533 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", + "132534 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", + "132535 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", + "132536 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", + "132537 2.642833e+05 1.320879e+07 7.187426e+08 4.198497e+10 2.598923e+12 \n", + "132538 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", + "132539 2.958040e-02 2.543760e-03 2.187501e-04 1.881136e-05 1.617679e-06 \n", + "132540 4.437061e-02 3.815640e-03 3.281251e-04 2.821704e-05 2.426518e-06 \n", + "132541 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", + "132542 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", + "132543 9.407055e-02 2.091992e-02 5.207982e-03 1.354985e-03 3.580292e-04 \n", + "132544 4.603269e-02 6.676090e-03 1.049617e-03 1.732318e-04 2.938279e-05 \n", + "132545 1.215812e+05 4.409394e+06 1.793299e+08 7.978308e+09 3.801183e+11 \n", + "132546 3.103386e-02 2.734283e-03 2.409512e-04 2.123691e-05 1.872100e-06 \n", + "132547 7.879586e-03 6.994475e-04 6.208788e-05 5.511357e-06 4.892268e-07 \n", + "132548 7.879586e-03 6.994475e-04 6.208788e-05 5.511357e-06 4.892268e-07 \n", + "132549 3.044565e-02 2.556486e-03 2.189372e-04 1.881412e-05 1.617719e-06 \n", + "132550 4.501954e-02 3.825184e-03 3.282655e-04 2.821911e-05 2.426549e-06 \n", + "132551 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", + "132552 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", + "132553 4.844223e-01 2.625288e-01 1.561461e-01 9.595410e-02 5.961722e-02 \n", + "132554 1.402795e-01 3.530072e-02 9.439573e-03 2.602313e-03 7.309347e-04 \n", + "132555 7.865887e+04 2.549735e+06 1.037753e+08 5.133570e+09 2.919976e+11 \n", + "132556 1.374934e-01 2.772831e-02 5.696546e-03 1.179593e-03 2.450699e-04 \n", + "132557 4.336611e-02 9.030789e-03 1.880620e-03 3.916302e-04 8.155516e-05 \n", + "132558 4.336611e-02 9.030789e-03 1.880620e-03 3.916302e-04 8.155516e-05 \n", + "132559 4.134455e-02 3.181745e-03 2.533488e-04 2.068770e-05 1.719435e-06 \n", + "132560 5.319371e-02 4.294129e-03 3.540742e-04 2.962430e-05 2.502835e-06 \n", + "132561 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", + "132562 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", + "\n", + " m7 m8 \n", + "0 5.826388e-20 1.041356e-22 \n", + "1 4.166866e-11 1.089111e-12 \n", + "2 7.966347e+09 1.211974e+11 \n", + "3 8.333732e-11 2.178223e-12 \n", + "4 3.390873e-11 9.257241e-13 \n", + "5 5.644950e-22 5.202151e-25 \n", + "6 5.732396e-11 1.568153e-12 \n", + "7 8.213929e+09 1.254549e+11 \n", + "8 1.146479e-10 3.136307e-12 \n", + "9 3.390873e-11 9.257241e-13 \n", + "10 7.928119e-11 2.271651e-12 \n", + "11 8.485816e+09 1.301526e+11 \n", + "12 1.585624e-10 4.543302e-12 \n", + "13 3.390873e-11 9.257241e-13 \n", + "14 2.696031e-17 1.157939e-19 \n", + "15 1.509332e-11 3.412274e-13 \n", + "16 7.290160e+09 1.096720e+11 \n", + "17 3.018674e-11 6.824554e-13 \n", + "18 3.390873e-11 9.257241e-13 \n", + "19 6.984444e-17 3.436772e-19 \n", + "20 1.142891e-11 2.483190e-13 \n", + "21 3.677848e+09 4.980961e+10 \n", + "22 2.285810e-11 4.966393e-13 \n", + "23 1.243183e-11 2.940704e-13 \n", + "24 7.928119e-11 2.271651e-12 \n", + "25 8.485816e+09 1.301526e+11 \n", + "26 1.585624e-10 4.543302e-12 \n", + "27 3.390873e-11 9.257241e-13 \n", + "28 7.928119e-11 2.271651e-12 \n", + "29 8.485816e+09 1.301526e+11 \n", + "... ... ... \n", + "132533 3.477798e-08 2.990725e-09 \n", + "132534 6.955596e-08 5.981449e-09 \n", + "132535 3.477798e-08 2.990725e-09 \n", + "132536 6.955596e-08 5.981449e-09 \n", + "132537 1.685744e+14 1.135115e+16 \n", + "132538 3.477798e-08 2.990725e-09 \n", + "132539 1.391119e-07 1.196290e-08 \n", + "132540 2.086679e-07 1.794435e-08 \n", + "132541 3.477798e-08 2.990725e-09 \n", + "132542 6.955596e-08 5.981449e-09 \n", + "132543 9.509695e-05 2.530273e-05 \n", + "132544 5.057017e-06 8.769517e-07 \n", + "132545 1.907161e+13 9.951139e+14 \n", + "132546 1.650596e-07 1.455544e-08 \n", + "132547 4.342721e-08 3.854905e-09 \n", + "132548 4.342721e-08 3.854905e-09 \n", + "132549 1.391125e-07 1.196291e-08 \n", + "132550 2.086683e-07 1.794435e-08 \n", + "132551 3.477798e-08 2.990725e-09 \n", + "132552 6.955596e-08 5.981449e-09 \n", + "132553 3.717687e-02 2.321156e-02 \n", + "132554 2.079667e-04 5.972406e-05 \n", + "132555 1.813755e+13 1.187581e+15 \n", + "132556 5.098521e-05 1.061318e-05 \n", + "132557 1.698348e-05 3.536730e-06 \n", + "132558 1.698348e-05 3.536730e-06 \n", + "132559 1.446303e-07 1.226217e-08 \n", + "132560 2.128067e-07 1.816880e-08 \n", + "132561 3.477798e-08 2.990725e-09 \n", + "132562 6.955596e-08 5.981449e-09 \n", + "\n", + "[132563 rows x 13 columns]" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def moment(n):\n", + " def moment_(x):\n", + " return np.sum(np.power(x, n))\n", + " moment_.__name__ = 'm%s' % n\n", + " return moment_\n", + "\n", + "cube = data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", + " 'min',\n", + " 'max',\n", + " moment(0),\n", + " moment(1),\n", + " moment(2),\n", + " moment(3),\n", + " moment(4),\n", + " moment(5),\n", + " moment(6),\n", + " moment(7),\n", + " moment(8)\n", + "]}).reset_index(col_level=1)\n", + "cube.columns = cube.columns.get_level_values(1)\n", + "cube" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "cube.to_csv('lib/src/test/resources/milan_moments_cubed.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GridCountrycountoutliers1outliers5outliers10
013210.00.00.0
113350.00.00.0
21391440.00.00.0
3146140.00.00.0
414930.00.00.0
523210.00.00.0
623350.00.00.0
72391440.00.00.0
8246140.00.00.0
924930.00.00.0
1033350.00.00.0
113391440.00.00.0
12346100.00.00.0
1334930.00.00.0
1443210.00.00.0
1543350.00.00.0
164391440.00.00.0
17446140.00.00.0
1844930.00.00.0
1953210.00.00.0
2053350.00.00.0
215391440.00.00.0
22546140.00.00.0
2354930.00.00.0
24103350.00.00.0
2510391440.00.00.0
261046100.00.00.0
27104930.00.00.0
28113350.00.00.0
2911391440.00.00.0
.....................
53293502749150.00.00.0
53294502790100.00.00.0
53295502735310.00.00.0
53296502737010.00.00.0
5329750278823910.00.00.0
532985028120.00.00.0
5329950283310.00.00.0
533005028391440.00.06.0
5330150284130.00.00.0
5330250284420.00.00.0
53303502846480.00.00.0
5330450284910.00.00.0
53305502835220.00.00.0
53306502835310.00.00.0
5330750293320.00.00.0
53308502939330.00.01.0
53309502946120.00.00.0
5331050294910.00.00.0
53311502935220.00.00.0
53312502935310.00.00.0
53313100003340.00.00.0
53314100003440.00.00.0
5331510000391440.00.01.0
53316100004140.00.00.0
53317100004310.00.00.0
53318100004410.00.00.0
53319100004680.00.00.0
53320100004990.00.00.0
53321100008610.00.00.0
53322100008823920.00.00.0
\n", + "

53323 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Grid Country count outliers1 outliers5 outliers10\n", + "0 1 32 1 0.0 0.0 0.0\n", + "1 1 33 5 0.0 0.0 0.0\n", + "2 1 39 144 0.0 0.0 0.0\n", + "3 1 46 14 0.0 0.0 0.0\n", + "4 1 49 3 0.0 0.0 0.0\n", + "5 2 32 1 0.0 0.0 0.0\n", + "6 2 33 5 0.0 0.0 0.0\n", + "7 2 39 144 0.0 0.0 0.0\n", + "8 2 46 14 0.0 0.0 0.0\n", + "9 2 49 3 0.0 0.0 0.0\n", + "10 3 33 5 0.0 0.0 0.0\n", + "11 3 39 144 0.0 0.0 0.0\n", + "12 3 46 10 0.0 0.0 0.0\n", + "13 3 49 3 0.0 0.0 0.0\n", + "14 4 32 1 0.0 0.0 0.0\n", + "15 4 33 5 0.0 0.0 0.0\n", + "16 4 39 144 0.0 0.0 0.0\n", + "17 4 46 14 0.0 0.0 0.0\n", + "18 4 49 3 0.0 0.0 0.0\n", + "19 5 32 1 0.0 0.0 0.0\n", + "20 5 33 5 0.0 0.0 0.0\n", + "21 5 39 144 0.0 0.0 0.0\n", + "22 5 46 14 0.0 0.0 0.0\n", + "23 5 49 3 0.0 0.0 0.0\n", + "24 10 33 5 0.0 0.0 0.0\n", + "25 10 39 144 0.0 0.0 0.0\n", + "26 10 46 10 0.0 0.0 0.0\n", + "27 10 49 3 0.0 0.0 0.0\n", + "28 11 33 5 0.0 0.0 0.0\n", + "29 11 39 144 0.0 0.0 0.0\n", + "... ... ... ... ... ... ...\n", + "53293 5027 49 15 0.0 0.0 0.0\n", + "53294 5027 90 10 0.0 0.0 0.0\n", + "53295 5027 353 1 0.0 0.0 0.0\n", + "53296 5027 370 1 0.0 0.0 0.0\n", + "53297 5027 88239 1 0.0 0.0 0.0\n", + "53298 5028 1 2 0.0 0.0 0.0\n", + "53299 5028 33 1 0.0 0.0 0.0\n", + "53300 5028 39 144 0.0 0.0 6.0\n", + "53301 5028 41 3 0.0 0.0 0.0\n", + "53302 5028 44 2 0.0 0.0 0.0\n", + "53303 5028 46 48 0.0 0.0 0.0\n", + "53304 5028 49 1 0.0 0.0 0.0\n", + "53305 5028 352 2 0.0 0.0 0.0\n", + "53306 5028 353 1 0.0 0.0 0.0\n", + "53307 5029 33 2 0.0 0.0 0.0\n", + "53308 5029 39 33 0.0 0.0 1.0\n", + "53309 5029 46 12 0.0 0.0 0.0\n", + "53310 5029 49 1 0.0 0.0 0.0\n", + "53311 5029 352 2 0.0 0.0 0.0\n", + "53312 5029 353 1 0.0 0.0 0.0\n", + "53313 10000 33 4 0.0 0.0 0.0\n", + "53314 10000 34 4 0.0 0.0 0.0\n", + "53315 10000 39 144 0.0 0.0 1.0\n", + "53316 10000 41 4 0.0 0.0 0.0\n", + "53317 10000 43 1 0.0 0.0 0.0\n", + "53318 10000 44 1 0.0 0.0 0.0\n", + "53319 10000 46 8 0.0 0.0 0.0\n", + "53320 10000 49 9 0.0 0.0 0.0\n", + "53321 10000 86 1 0.0 0.0 0.0\n", + "53322 10000 88239 2 0.0 0.0 0.0\n", + "\n", + "[53323 rows x 6 columns]" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def outliers(t, name):\n", + " def outliers_(x):\n", + " return np.sum(x >= t)\n", + " outliers_.__name__ = 'outliers%s' % name\n", + " return outliers_\n", + "\n", + "t1 = data[metric].quantile(0.99)\n", + "t5 = data[metric].quantile(0.95)\n", + "t10 = data[metric].quantile(0.90)\n", + "oracle = data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", + " 'count',\n", + " outliers(t1, \"1\"),\n", + " outliers(t5, \"5\"),\n", + " outliers(t10, \"10\")\n", + "]}).reset_index(col_level=1)\n", + "oracle.columns = oracle.columns.get_level_values(1)\n", + "oracle" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "oracle.to_csv('lib/src/test/resources/milan_oracle_cubed.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.33149769953e-06\n", + "3.57891732062e-06\n" + ] + } + ], + "source": [ + "print(cube[\"min\"].min())\n", + "print(data[metric].min())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/core/src/main/java/edu/stanford/futuredata/macrobase/pipeline/CubePipeline.java b/core/src/main/java/edu/stanford/futuredata/macrobase/pipeline/CubePipeline.java index 65727097f..cfa0af29e 100644 --- a/core/src/main/java/edu/stanford/futuredata/macrobase/pipeline/CubePipeline.java +++ b/core/src/main/java/edu/stanford/futuredata/macrobase/pipeline/CubePipeline.java @@ -5,10 +5,7 @@ import edu.stanford.futuredata.macrobase.analysis.classify.PredicateCubeClassifier; import edu.stanford.futuredata.macrobase.analysis.classify.QuantileClassifier; import edu.stanford.futuredata.macrobase.analysis.classify.RawClassifier; -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLExplanation; -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLMeanSummarizer; -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLOutlierSummarizer; -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLSummarizer; +import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.*; import edu.stanford.futuredata.macrobase.datamodel.DataFrame; import edu.stanford.futuredata.macrobase.datamodel.Schema; import edu.stanford.futuredata.macrobase.ingest.CSVDataFrameWriter; @@ -50,7 +47,10 @@ public class CubePipeline implements Pipeline { private boolean includeLo; private Optional meanColumn; private Optional stdColumn; + private Optional minColumn; + private Optional maxColumn; private LinkedHashMap quantileColumns; + private List momentColumns; // Explanation private List attributes; @@ -88,7 +88,10 @@ public CubePipeline(PipelineConfig conf) { includeLo = conf.get("includeLo", true); meanColumn = Optional.ofNullable(conf.get("meanColumn")); stdColumn = Optional.ofNullable(conf.get("stdColumn")); + minColumn = Optional.ofNullable(conf.get("minColumn")); + maxColumn = Optional.ofNullable(conf.get("maxColumn")); quantileColumns = conf.get("quantileColumns", new LinkedHashMap()); + momentColumns = conf.get("momentColumns", new ArrayList()); attributes = conf.get("attributes"); minSupport = conf.get("minSupport", 3.0); @@ -173,6 +176,18 @@ private Map getColTypes() throws MacrobaseException { } return colTypes; } + case "moment": { + for (String col : momentColumns) { + colTypes.put(col, Schema.ColType.DOUBLE); + } + colTypes.put(minColumn + .orElseThrow(() -> new MacrobaseException("min column not present in config")), + Schema.ColType.DOUBLE); + colTypes.put(maxColumn + .orElseThrow(() -> new MacrobaseException("max column not present in config")), + Schema.ColType.DOUBLE); + return colTypes; + } case "raw": { colTypes.put(meanColumn.orElseThrow( () -> new MacrobaseException("mean column not present in config")), @@ -216,6 +231,12 @@ private CubeClassifier getClassifier() throws MacrobaseException { () -> new MacrobaseException("metric column not present in config")), predicateStr, cutoff); } + case "moment": { + return new RawClassifier( + countColumn, + null + ); + } case "meanshift": case "raw": { @@ -244,6 +265,19 @@ private APLSummarizer getSummarizer(CubeClassifier classifier) throws Exception summarizer.setMinStdDev(minRatioMetric); return summarizer; } + case "moment": { + APLMomentSummarizer summarizer = new APLMomentSummarizer(); + summarizer.setMinColumn(minColumn.orElseThrow( + () -> new MacrobaseException("min column not present in config"))); + summarizer.setMaxColumn(maxColumn.orElseThrow( + () -> new MacrobaseException("max column not present in config"))); + summarizer.setMomentColumns(momentColumns); + summarizer.setAttributes(attributes); + summarizer.setMinSupport(minSupport); + summarizer.setMinRatioMetric(minRatioMetric); + summarizer.setPercentile(cutoff); + return summarizer; + } default: { APLOutlierSummarizer summarizer = new APLOutlierSummarizer(); summarizer.setOutlierColumn(classifier.getOutputColumnName()); diff --git a/lib/cp.txt b/lib/cp.txt new file mode 100644 index 000000000..572ce2fa9 --- /dev/null +++ b/lib/cp.txt @@ -0,0 +1 @@ +/Users/Jialin/.m2/repository/junit/junit/4.12/junit-4.12.jar:/Users/Jialin/.m2/repository/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar:/Users/Jialin/.m2/repository/log4j/log4j/1.2.17/log4j-1.2.17.jar:/Users/Jialin/.m2/repository/org/slf4j/slf4j-log4j12/1.8.0-beta0/slf4j-log4j12-1.8.0-beta0.jar:/Users/Jialin/.m2/repository/org/slf4j/slf4j-api/1.8.0-beta0/slf4j-api-1.8.0-beta0.jar:/Users/Jialin/.m2/repository/com/google/guava/guava/21.0/guava-21.0.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-math3/3.6/commons-math3-3.6.jar:/Users/Jialin/.m2/repository/com/univocity/univocity-parsers/2.5.9/univocity-parsers-2.5.9.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/core/jackson-databind/2.8.9/jackson-databind-2.8.9.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/core/jackson-annotations/2.8.0/jackson-annotations-2.8.0.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/core/jackson-core/2.8.9/jackson-core-2.8.9.jar:/Users/Jialin/.m2/repository/futuredata/java-msketch/1.0-SNAPSHOT/java-msketch-1.0-SNAPSHOT.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-csv/1.5/commons-csv-1.5.jar:/Users/Jialin/.m2/repository/com/tdunning/t-digest/3.2/t-digest-3.2.jar:/Users/Jialin/.m2/repository/com/yahoo/datasketches/sketches-core/0.10.3/sketches-core-0.10.3.jar:/Users/Jialin/.m2/repository/com/yahoo/datasketches/memory/0.10.3/memory-0.10.3.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-catalyst_2.11/2.2.1/spark-catalyst_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/scala-lang/scala-reflect/2.11.8/scala-reflect-2.11.8.jar:/Users/Jialin/.m2/repository/org/scala-lang/scala-library/2.11.8/scala-library-2.11.8.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-core_2.11/2.2.1/spark-core_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/apache/avro/avro/1.7.7/avro-1.7.7.jar:/Users/Jialin/.m2/repository/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/Users/Jialin/.m2/repository/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/Users/Jialin/.m2/repository/com/thoughtworks/paranamer/paranamer/2.3/paranamer-2.3.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar:/Users/Jialin/.m2/repository/org/tukaani/xz/1.0/xz-1.0.jar:/Users/Jialin/.m2/repository/org/apache/avro/avro-mapred/1.7.7/avro-mapred-1.7.7-hadoop2.jar:/Users/Jialin/.m2/repository/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7.jar:/Users/Jialin/.m2/repository/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-tests.jar:/Users/Jialin/.m2/repository/com/twitter/chill_2.11/0.8.0/chill_2.11-0.8.0.jar:/Users/Jialin/.m2/repository/com/esotericsoftware/kryo-shaded/3.0.3/kryo-shaded-3.0.3.jar:/Users/Jialin/.m2/repository/com/esotericsoftware/minlog/1.3.0/minlog-1.3.0.jar:/Users/Jialin/.m2/repository/org/objenesis/objenesis/2.1/objenesis-2.1.jar:/Users/Jialin/.m2/repository/com/twitter/chill-java/0.8.0/chill-java-0.8.0.jar:/Users/Jialin/.m2/repository/org/apache/xbean/xbean-asm5-shaded/4.4/xbean-asm5-shaded-4.4.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-client/2.6.5/hadoop-client-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-common/2.6.5/hadoop-common-2.6.5.jar:/Users/Jialin/.m2/repository/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/Users/Jialin/.m2/repository/xmlenc/xmlenc/0.52/xmlenc-0.52.jar:/Users/Jialin/.m2/repository/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1.jar:/Users/Jialin/.m2/repository/commons-io/commons-io/2.4/commons-io-2.4.jar:/Users/Jialin/.m2/repository/commons-collections/commons-collections/3.2.2/commons-collections-3.2.2.jar:/Users/Jialin/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/Users/Jialin/.m2/repository/commons-configuration/commons-configuration/1.6/commons-configuration-1.6.jar:/Users/Jialin/.m2/repository/commons-digester/commons-digester/1.8/commons-digester-1.8.jar:/Users/Jialin/.m2/repository/commons-beanutils/commons-beanutils/1.7.0/commons-beanutils-1.7.0.jar:/Users/Jialin/.m2/repository/commons-beanutils/commons-beanutils-core/1.8.0/commons-beanutils-core-1.8.0.jar:/Users/Jialin/.m2/repository/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/Users/Jialin/.m2/repository/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-auth/2.6.5/hadoop-auth-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/directory/server/apacheds-kerberos-codec/2.0.0-M15/apacheds-kerberos-codec-2.0.0-M15.jar:/Users/Jialin/.m2/repository/org/apache/directory/server/apacheds-i18n/2.0.0-M15/apacheds-i18n-2.0.0-M15.jar:/Users/Jialin/.m2/repository/org/apache/directory/api/api-asn1-api/1.0.0-M20/api-asn1-api-1.0.0-M20.jar:/Users/Jialin/.m2/repository/org/apache/directory/api/api-util/1.0.0-M20/api-util-1.0.0-M20.jar:/Users/Jialin/.m2/repository/org/apache/curator/curator-client/2.6.0/curator-client-2.6.0.jar:/Users/Jialin/.m2/repository/org/htrace/htrace-core/3.0.4/htrace-core-3.0.4.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-hdfs/2.6.5/hadoop-hdfs-2.6.5.jar:/Users/Jialin/.m2/repository/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26.jar:/Users/Jialin/.m2/repository/xerces/xercesImpl/2.9.1/xercesImpl-2.9.1.jar:/Users/Jialin/.m2/repository/xml-apis/xml-apis/1.3.04/xml-apis-1.3.04.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-app/2.6.5/hadoop-mapreduce-client-app-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-common/2.6.5/hadoop-mapreduce-client-common-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-yarn-client/2.6.5/hadoop-yarn-client-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-yarn-server-common/2.6.5/hadoop-yarn-server-common-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-shuffle/2.6.5/hadoop-mapreduce-client-shuffle-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-yarn-api/2.6.5/hadoop-yarn-api-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-core/2.6.5/hadoop-mapreduce-client-core-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-yarn-common/2.6.5/hadoop-yarn-common-2.6.5.jar:/Users/Jialin/.m2/repository/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2.jar:/Users/Jialin/.m2/repository/javax/xml/stream/stax-api/1.0-2/stax-api-1.0-2.jar:/Users/Jialin/.m2/repository/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar:/Users/Jialin/.m2/repository/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-jobclient/2.6.5/hadoop-mapreduce-client-jobclient-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-annotations/2.6.5/hadoop-annotations-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-launcher_2.11/2.2.1/spark-launcher_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-network-common_2.11/2.2.1/spark-network-common_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-network-shuffle_2.11/2.2.1/spark-network-shuffle_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/net/java/dev/jets3t/jets3t/0.9.3/jets3t-0.9.3.jar:/Users/Jialin/.m2/repository/org/apache/httpcomponents/httpcore/4.3.3/httpcore-4.3.3.jar:/Users/Jialin/.m2/repository/org/apache/httpcomponents/httpclient/4.3.6/httpclient-4.3.6.jar:/Users/Jialin/.m2/repository/javax/activation/activation/1.1.1/activation-1.1.1.jar:/Users/Jialin/.m2/repository/mx4j/mx4j/3.0.2/mx4j-3.0.2.jar:/Users/Jialin/.m2/repository/javax/mail/mail/1.4.7/mail-1.4.7.jar:/Users/Jialin/.m2/repository/org/bouncycastle/bcprov-jdk15on/1.51/bcprov-jdk15on-1.51.jar:/Users/Jialin/.m2/repository/com/jamesmurty/utils/java-xmlbuilder/1.0/java-xmlbuilder-1.0.jar:/Users/Jialin/.m2/repository/net/iharder/base64/2.3.8/base64-2.3.8.jar:/Users/Jialin/.m2/repository/org/apache/curator/curator-recipes/2.6.0/curator-recipes-2.6.0.jar:/Users/Jialin/.m2/repository/org/apache/curator/curator-framework/2.6.0/curator-framework-2.6.0.jar:/Users/Jialin/.m2/repository/org/apache/zookeeper/zookeeper/3.4.6/zookeeper-3.4.6.jar:/Users/Jialin/.m2/repository/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-lang3/3.5/commons-lang3-3.5.jar:/Users/Jialin/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar:/Users/Jialin/.m2/repository/org/slf4j/jul-to-slf4j/1.7.16/jul-to-slf4j-1.7.16.jar:/Users/Jialin/.m2/repository/org/slf4j/jcl-over-slf4j/1.7.16/jcl-over-slf4j-1.7.16.jar:/Users/Jialin/.m2/repository/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3.jar:/Users/Jialin/.m2/repository/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar:/Users/Jialin/.m2/repository/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar:/Users/Jialin/.m2/repository/org/roaringbitmap/RoaringBitmap/0.5.11/RoaringBitmap-0.5.11.jar:/Users/Jialin/.m2/repository/commons-net/commons-net/2.2/commons-net-2.2.jar:/Users/Jialin/.m2/repository/org/json4s/json4s-jackson_2.11/3.2.11/json4s-jackson_2.11-3.2.11.jar:/Users/Jialin/.m2/repository/org/json4s/json4s-core_2.11/3.2.11/json4s-core_2.11-3.2.11.jar:/Users/Jialin/.m2/repository/org/json4s/json4s-ast_2.11/3.2.11/json4s-ast_2.11-3.2.11.jar:/Users/Jialin/.m2/repository/org/scala-lang/scalap/2.11.0/scalap-2.11.0.jar:/Users/Jialin/.m2/repository/org/scala-lang/scala-compiler/2.11.0/scala-compiler-2.11.0.jar:/Users/Jialin/.m2/repository/org/scala-lang/modules/scala-xml_2.11/1.0.1/scala-xml_2.11-1.0.1.jar:/Users/Jialin/.m2/repository/org/scala-lang/modules/scala-parser-combinators_2.11/1.0.1/scala-parser-combinators_2.11-1.0.1.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/core/jersey-client/2.22.2/jersey-client-2.22.2.jar:/Users/Jialin/.m2/repository/javax/ws/rs/javax.ws.rs-api/2.0.1/javax.ws.rs-api-2.0.1.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/hk2-api/2.4.0-b34/hk2-api-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/hk2-utils/2.4.0-b34/hk2-utils-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/external/aopalliance-repackaged/2.4.0-b34/aopalliance-repackaged-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/external/javax.inject/2.4.0-b34/javax.inject-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/hk2-locator/2.4.0-b34/hk2-locator-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/javassist/javassist/3.18.1-GA/javassist-3.18.1-GA.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/core/jersey-common/2.22.2/jersey-common-2.22.2.jar:/Users/Jialin/.m2/repository/javax/annotation/javax.annotation-api/1.2/javax.annotation-api-1.2.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/bundles/repackaged/jersey-guava/2.22.2/jersey-guava-2.22.2.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/osgi-resource-locator/1.0.1/osgi-resource-locator-1.0.1.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/core/jersey-server/2.22.2/jersey-server-2.22.2.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/media/jersey-media-jaxb/2.22.2/jersey-media-jaxb-2.22.2.jar:/Users/Jialin/.m2/repository/javax/validation/validation-api/1.1.0.Final/validation-api-1.1.0.Final.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet/2.22.2/jersey-container-servlet-2.22.2.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet-core/2.22.2/jersey-container-servlet-core-2.22.2.jar:/Users/Jialin/.m2/repository/io/netty/netty-all/4.0.43.Final/netty-all-4.0.43.Final.jar:/Users/Jialin/.m2/repository/io/netty/netty/3.9.9.Final/netty-3.9.9.Final.jar:/Users/Jialin/.m2/repository/com/clearspring/analytics/stream/2.7.0/stream-2.7.0.jar:/Users/Jialin/.m2/repository/io/dropwizard/metrics/metrics-core/3.1.2/metrics-core-3.1.2.jar:/Users/Jialin/.m2/repository/io/dropwizard/metrics/metrics-jvm/3.1.2/metrics-jvm-3.1.2.jar:/Users/Jialin/.m2/repository/io/dropwizard/metrics/metrics-json/3.1.2/metrics-json-3.1.2.jar:/Users/Jialin/.m2/repository/io/dropwizard/metrics/metrics-graphite/3.1.2/metrics-graphite-3.1.2.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/module/jackson-module-scala_2.11/2.6.5/jackson-module-scala_2.11-2.6.5.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/module/jackson-module-paranamer/2.6.5/jackson-module-paranamer-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/ivy/ivy/2.4.0/ivy-2.4.0.jar:/Users/Jialin/.m2/repository/oro/oro/2.0.8/oro-2.0.8.jar:/Users/Jialin/.m2/repository/net/razorvine/pyrolite/4.13/pyrolite-4.13.jar:/Users/Jialin/.m2/repository/net/sf/py4j/py4j/0.10.4/py4j-0.10.4.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-crypto/1.0.0/commons-crypto-1.0.0.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-tags_2.11/2.2.1/spark-tags_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-unsafe_2.11/2.2.1/spark-unsafe_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-sketch_2.11/2.2.1/spark-sketch_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/codehaus/janino/janino/3.0.0/janino-3.0.0.jar:/Users/Jialin/.m2/repository/org/codehaus/janino/commons-compiler/3.0.0/commons-compiler-3.0.0.jar:/Users/Jialin/.m2/repository/org/antlr/antlr4-runtime/4.5.3/antlr4-runtime-4.5.3.jar:/Users/Jialin/.m2/repository/commons-codec/commons-codec/1.10/commons-codec-1.10.jar:/Users/Jialin/.m2/repository/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar \ No newline at end of file diff --git a/lib/genCP.sh b/lib/genCP.sh new file mode 100644 index 000000000..25afe7f0d --- /dev/null +++ b/lib/genCP.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +mvn dependency:build-classpath -Dmdep.outputFile=cp.txt \ No newline at end of file diff --git a/lib/momentBench.json b/lib/momentBench.json new file mode 100644 index 000000000..0c73062de --- /dev/null +++ b/lib/momentBench.json @@ -0,0 +1,13 @@ +{ + "minSupport": 0.005, + "percentile": 1.0, + "outlierColumn": "outliers1", + "numWarmupTrials": 10, + "numTrials": 10, + "oracleCubeFilename": "src/test/resources/milan_oracle_cubed.csv", + "momentCubeFilename": "src/test/resources/milan_moments_cubed.csv", + "attributes": [ + "Grid", + "Country" + ] +} \ No newline at end of file diff --git a/lib/momentBench.sh b/lib/momentBench.sh new file mode 100644 index 000000000..d49b022d0 --- /dev/null +++ b/lib/momentBench.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +java -Xmx8g -Xms8g -cp target/macrobase-lib-0.2.1-SNAPSHOT.jar:$(cat cp.txt) edu.stanford.futuredata.macrobase.APLMomentSummarizerBench $@ \ No newline at end of file diff --git a/lib/pom.xml b/lib/pom.xml index 4138f6a4e..55df85f61 100644 --- a/lib/pom.xml +++ b/lib/pom.xml @@ -81,6 +81,11 @@ jackson-databind 2.8.9 + + futuredata + java-msketch + 1.0-SNAPSHOT + diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java new file mode 100644 index 000000000..95ea31e5e --- /dev/null +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java @@ -0,0 +1,145 @@ +package edu.stanford.futuredata.macrobase; + +import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLExplanation; +import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLMomentSummarizer; +import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLOutlierSummarizer; +import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.EstimatedSupportMetric; +import edu.stanford.futuredata.macrobase.datamodel.DataFrame; +import edu.stanford.futuredata.macrobase.datamodel.Schema; +import edu.stanford.futuredata.macrobase.ingest.CSVDataFrameParser; + +import java.io.IOException; +import java.util.*; + +public class APLMomentSummarizerBench { + double minSupport = 0.1; + double percentile = 1.0; + String outlierColumn = "outliers1"; + int numWarmupTrials; + int numTrials; + int numMoments; + String oracleCubeFilename; + String momentCubeFilename; + boolean doContainment; + List attributes; + + public APLMomentSummarizerBench(String confFile) throws IOException { + RunConfig conf = RunConfig.fromJsonFile(confFile); + minSupport = conf.get("minSupport"); + percentile = conf.get("percentile"); + outlierColumn = conf.get("outlierColumn"); + numWarmupTrials = conf.get("numWarmupTrials", 10); + numTrials = conf.get("numTrials", 10); + numMoments = conf.get("numMoments", 8); + oracleCubeFilename = conf.get("oracleCubeFilename"); + momentCubeFilename = conf.get("momentCubeFilename"); + doContainment = conf.get("doContainment", false); + attributes = conf.get("attributes"); + } + + public static void main(String[] args) throws Exception { + String confFile = args[0]; + APLMomentSummarizerBench bench = new APLMomentSummarizerBench(confFile); + bench.run(); + } + + public void run() throws Exception { + System.out.format("minSupport: %f, percentile: %f\n\n", minSupport, percentile); + testOracleOrder3(); + testCubeOrder3(true); + testCubeOrder3(false); + } + + public void testOracleOrder3() throws Exception { + List requiredColumns = new ArrayList<>(attributes); +// requiredColumns.add("Grid"); +// requiredColumns.add("Country"); + Map colTypes = new HashMap<>(); + colTypes.put("count", Schema.ColType.DOUBLE); + colTypes.put(outlierColumn, Schema.ColType.DOUBLE); + requiredColumns.add("count"); + requiredColumns.add(outlierColumn); + CSVDataFrameParser loader = new CSVDataFrameParser(oracleCubeFilename, requiredColumns); + loader.setColumnTypes(colTypes); + DataFrame df = loader.load(); + + APLOutlierSummarizer summ = new APLOutlierSummarizer(); + summ.setCountColumn("count"); + summ.setOutlierColumn(outlierColumn); + summ.setMinSupport(minSupport); + summ.setMinRatioMetric(10.0); + summ.setAttributes(attributes); + for (int i = 0; i < numWarmupTrials; i++) { + summ.process(df); + } + long start = System.nanoTime(); + for (int i = 0; i < numTrials; i++) { + summ.process(df); + } + long timeElapsed = System.nanoTime() - start; + System.out.format("Oracle time: %g\n", timeElapsed / (1.e9 * numTrials)); + APLExplanation e = summ.getResults(); + System.out.format("Num results: %d\n\n", e.getResults().size()); +// System.out.println(e.prettyPrint()); + } + + public void testCubeOrder3(boolean useCascade) throws Exception { + List requiredColumns = new ArrayList<>(attributes); +// requiredColumns.add("Grid"); +// requiredColumns.add("Country"); + Map colTypes = new HashMap<>(); + List momentColumns = new ArrayList<>(); + for (int i = 0; i <= numMoments; i++) { + colTypes.put("m" + i, Schema.ColType.DOUBLE); + requiredColumns.add("m" + i); + momentColumns.add("m" + i); + } + colTypes.put("min", Schema.ColType.DOUBLE); + colTypes.put("max", Schema.ColType.DOUBLE); + requiredColumns.add("min"); + requiredColumns.add("max"); + CSVDataFrameParser loader = new CSVDataFrameParser(momentCubeFilename, requiredColumns); + loader.setColumnTypes(colTypes); + DataFrame df = loader.load(); + + APLMomentSummarizer summ = new APLMomentSummarizer(); + summ.setMinSupport(minSupport); + summ.setMinRatioMetric(10.0); + summ.setAttributes(attributes); + summ.setMinColumn("min"); + summ.setMaxColumn("max"); + summ.setMomentColumns(momentColumns); + summ.setPercentile(percentile); + summ.setCascade(useCascade); + summ.setDoContainment(doContainment); + for (int i = 0; i < numWarmupTrials; i++) { + summ.process(df); + } + long start = System.nanoTime(); + for (int i = 0; i < numTrials; i++) { + summ.process(df); + } + long timeElapsed = System.nanoTime() - start; + System.out.format("%s time: %g\n", useCascade ? "Cascade" : "Maxent", timeElapsed / (1.e9 * numTrials)); + if (useCascade) { + EstimatedSupportMetric metric = (EstimatedSupportMetric)summ.qualityMetricList.get(0); + int prunedByNaive = metric.numEnterCascade - metric.numAfterNaiveCheck; + int prunedByMarkov = metric.numAfterNaiveCheck - metric.numAfterMarkovBound; + int prunedByMoments = metric.numAfterMarkovBound - metric.numAfterMomentBound; + System.out.format("Cascade PTR\n\t" + + "Entered cascade: %d\n\t" + + "Pruned by naive checks: %d (%f)\n\t" + + "Pruned by Markov bounds: %d (%f)\n\t" + + "Pruned by moment bounds: %d (%f)\n\t" + + "Reached maxent: %d (%f)\n", + metric.numEnterCascade, + prunedByNaive, prunedByNaive / (double)metric.numEnterCascade, + prunedByMarkov, prunedByMarkov / (double)metric.numEnterCascade, + prunedByMoments, prunedByMoments / (double)metric.numEnterCascade, + metric.numAfterMomentBound, metric.numAfterMomentBound / (double)metric.numEnterCascade); + } + APLExplanation e = summ.getResults(); + System.out.format("Num results: %d\n\n", e.getResults().size()); +// System.out.println(e.prettyPrint()); + } +} \ No newline at end of file diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/RunConfig.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/RunConfig.java new file mode 100644 index 000000000..ce86dd1d4 --- /dev/null +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/RunConfig.java @@ -0,0 +1,50 @@ +package edu.stanford.futuredata.macrobase; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.Map; + +public class RunConfig { + private Map values; + + public RunConfig(Map values) { + this.values = values; + } + + public static RunConfig fromJsonFile(String file) throws IOException { + BufferedReader r = new BufferedReader(new FileReader(file)); + ObjectMapper mapper = new ObjectMapper(); + Map map = mapper.readValue( + r, + new TypeReference>() {} + ); + return new RunConfig(map); + } + + public static RunConfig fromJsonString(String json) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + Map map = mapper.readValue( + json, + new TypeReference>() {} + ); + return new RunConfig(map); + } + + @SuppressWarnings("unchecked") + public T get(String key) { + return (T) values.get(key); + } + + @SuppressWarnings("unchecked") + public T get(String key, T defaultValue) { + return (T) values.getOrDefault(key, defaultValue); + } + + public Map getValues() { + return values; + } +} diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLMomentSummarizer.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLMomentSummarizer.java new file mode 100644 index 000000000..d210fa2f9 --- /dev/null +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLMomentSummarizer.java @@ -0,0 +1,113 @@ +package edu.stanford.futuredata.macrobase.analysis.summary.aplinear; + +import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.*; +import edu.stanford.futuredata.macrobase.datamodel.DataFrame; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.stream.IntStream; + +/** + * Summarizer that works over both cube and row-based labeled ratio-based + * outlier summarization. + */ +public class APLMomentSummarizer extends APLSummarizer { + private Logger log = LoggerFactory.getLogger("APLMomentSummarizer"); + private String minColumn = null; + private String maxColumn = null; + private List momentColumns; + private double percentile; + private boolean useCascade = false; + + @Override + public List getAggregateNames() { + ArrayList aggregateNames = new ArrayList<>(); + aggregateNames.add("Minimum"); + aggregateNames.add("Maximum"); + for (int i = 0; i < momentColumns.size(); i++) { + aggregateNames.add("M" + i); + } + return aggregateNames; + } + + @Override + public double[][] getAggregateColumns(DataFrame input) { + double[][] aggregateColumns = new double[2+momentColumns.size()][]; + aggregateColumns[0] = input.getDoubleColumnByName(minColumn); + aggregateColumns[1] = input.getDoubleColumnByName(maxColumn); + for (int i = 0; i < momentColumns.size(); i++) { + aggregateColumns[i+2] = input.getDoubleColumnByName(momentColumns.get(i)); + } + + processCountCol(input, momentColumns.get(0), aggregateColumns[2].length); + return aggregateColumns; + } + + @Override + public Map getAggregationOps() { + Map aggregationOps = new HashMap<>(); + aggregationOps.put("add", IntStream.range(2, 2+momentColumns.size()).toArray()); + aggregationOps.put("min", new int[]{0}); + aggregationOps.put("max", new int[]{1}); + return aggregationOps; + } + + @Override + public List getQualityMetricList() { + List qualityMetricList = new ArrayList<>(); + if (useCascade) { + qualityMetricList.add( + new EstimatedSupportMetric(0, 1, 2, + (100.0 - percentile) / 100.0, 1e-5, true) + ); + } else { + qualityMetricList.add( + new EstimatedSupportMetric(0, 1, 2, + (100.0 - percentile) / 100.0, 1e-5, false) + ); + } + return qualityMetricList; + } + + @Override + public List getThresholds() { + return Arrays.asList(minOutlierSupport); + } + + @Override + public double getNumberOutliers(double[][] aggregates) { + double count = 0.0; + double[] counts = aggregates[2]; + for (int i = 0; i < counts.length; i++) { + count += counts[i]; + } + return count * percentile / 100.0; + } + + public String getMinColumn() { + return minColumn; + } + public void setMinColumn(String minColumn) { + this.minColumn = minColumn; + } + public String getMaxColumn() { + return maxColumn; + } + public void setMaxColumn(String maxColumn) { + this.maxColumn = maxColumn; + } + public List getMomentColumns() { + return momentColumns; + } + public void setMomentColumns(List momentColumns) { + this.momentColumns = momentColumns; + } + public void setPercentile(double percentile) { + this.percentile = percentile; + } + public void setCascade(boolean useCascade) { this.useCascade = useCascade; } + public double getMinRatioMetric() { + return minRatioMetric; + } +} diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLSummarizer.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLSummarizer.java index 58759b17c..03dae6d84 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLSummarizer.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLSummarizer.java @@ -9,7 +9,9 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Map; /** * Generic summarizer superclass that can be customized with @@ -21,7 +23,8 @@ public abstract class APLSummarizer extends BatchSummarizer { AttributeEncoder encoder; APLExplanation explanation; APrioriLinear aplKernel; - List qualityMetricList; + boolean doContainment = true; + public List qualityMetricList; List thresholds; protected long numEvents = 0; @@ -66,10 +69,12 @@ public void process(DataFrame input) throws Exception { qualityMetricList, thresholds ); + aplKernel.setDoContainment(doContainment); double[][] aggregateColumns = getAggregateColumns(input); List aggregateNames = getAggregateNames(); - List aplResults = aplKernel.explain(encoded, aggregateColumns); + Map aggregationOps = getAggregationOps(); + List aplResults = aplKernel.explain(encoded, aggregateColumns, aggregationOps); numOutliers = (long)getNumberOutliers(aggregateColumns); explanation = new APLExplanation( @@ -87,4 +92,9 @@ public APLExplanation getResults() { return explanation; } + public Map getAggregationOps() { + return null; + } + + public void setDoContainment(boolean doContainment) { this.doContainment = doContainment; } } diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java index 9f5851ce8..768b476ab 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java @@ -1,5 +1,6 @@ package edu.stanford.futuredata.macrobase.analysis.summary.aplinear; +import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.EstimatedGlobalRatioMetric; import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.QualityMetric; import edu.stanford.futuredata.macrobase.analysis.summary.apriori.APrioriSummarizer; import edu.stanford.futuredata.macrobase.analysis.summary.apriori.IntSet; @@ -24,6 +25,7 @@ public class APrioriLinear { // **Parameters** private QualityMetric[] qualityMetrics; private double[] thresholds; + private boolean doContainment = true; // **Cached values** @@ -50,6 +52,14 @@ public APrioriLinear( public List explain( final List attributes, double[][] aggregateColumns + ) { + return explain(attributes, aggregateColumns, null); + } + + public List explain( + final List attributes, + double[][] aggregateColumns, + Map aggregationOps ) { final int numAggregates = aggregateColumns.length; final int numRows = aggregateColumns[0].length; @@ -57,11 +67,35 @@ public List explain( // Quality metrics are initialized with global aggregates to // allow them to determine the appropriate relative thresholds double[] globalAggregates = new double[numAggregates]; - for (int j = 0; j < numAggregates; j++) { - globalAggregates[j] = 0; - double[] curColumn = aggregateColumns[j]; - for (int i = 0; i < numRows; i++) { - globalAggregates[j] += curColumn[i]; + if (aggregationOps == null) { + for (int j = 0; j < numAggregates; j++) { + globalAggregates[j] = 0; + double[] curColumn = aggregateColumns[j]; + for (int i = 0; i < numRows; i++) { + globalAggregates[j] += curColumn[i]; + } + } + } else { + for (int j : aggregationOps.getOrDefault("add", new int[0])) { + globalAggregates[j] = 0; + double[] curColumn = aggregateColumns[j]; + for (int i = 0; i < numRows; i++) { + globalAggregates[j] += curColumn[i]; + } + } + for (int j : aggregationOps.getOrDefault("min", new int[0])) { + double[] curColumn = aggregateColumns[j]; + globalAggregates[j] = curColumn[0]; + for (int i = 0; i < numRows; i++) { + globalAggregates[j] = Math.min(globalAggregates[j], curColumn[i]); + } + } + for (int j : aggregationOps.getOrDefault("max", new int[0])) { + double[] curColumn = aggregateColumns[j]; + globalAggregates[j] = curColumn[0]; + for (int i = 0; i < numRows; i++) { + globalAggregates[j] = Math.max(globalAggregates[j], curColumn[i]); + } } } for (QualityMetric q : qualityMetrics) { @@ -108,10 +142,20 @@ public List explain( double[] candidateVal = thisThreadSetAggregates.get(curCandidate); if (candidateVal == null) { thisThreadSetAggregates.put(curCandidate, Arrays.copyOf(aRows[i], numAggregates)); - } else { + } else if (aggregationOps == null) { for (int a = 0; a < numAggregates; a++) { candidateVal[a] += aRows[i][a]; } + } else { + for (int a : aggregationOps.getOrDefault("add", new int[0])) { + candidateVal[a] += aRows[i][a]; + } + for (int a : aggregationOps.getOrDefault("min", new int[0])) { + candidateVal[a] = Math.min(candidateVal[a], aRows[i][a]); + } + for (int a : aggregationOps.getOrDefault("max", new int[0])) { + candidateVal[a] = Math.max(candidateVal[a], aRows[i][a]); + } } } } @@ -134,10 +178,20 @@ public List explain( double[] candidateVal = setAggregates.get(curCandidateKey); if (candidateVal == null) { setAggregates.put(curCandidateKey, Arrays.copyOf(curCandidateValue, numAggregates)); - } else { + } else if (aggregationOps == null) { for (int a = 0; a < numAggregates; a++) { candidateVal[a] += curCandidateValue[a]; } + } else { + for (int a : aggregationOps.getOrDefault("add", new int[0])) { + candidateVal[a] += curCandidateValue[a]; + } + for (int a : aggregationOps.getOrDefault("min", new int[0])) { + candidateVal[a] = Math.min(candidateVal[a], curCandidateValue[a]); + } + for (int a : aggregationOps.getOrDefault("max", new int[0])) { + candidateVal[a] = Math.max(candidateVal[a], curCandidateValue[a]); + } } } } @@ -147,25 +201,23 @@ public List explain( int pruned = 0; for (IntSet curCandidate: setAggregates.keySet()) { double[] curAggregates = setAggregates.get(curCandidate); - boolean canPassThreshold = true; - boolean isPastThreshold = true; + QualityMetric.Action action = QualityMetric.Action.KEEP; for (int i = 0; i < qualityMetrics.length; i++) { QualityMetric q = qualityMetrics[i]; double t = thresholds[i]; - canPassThreshold &= q.maxSubgroupValue(curAggregates) >= t; - isPastThreshold &= q.value(curAggregates) >= t; + action = QualityMetric.Action.combine(action, q.getAction(curAggregates, t)); } - if (canPassThreshold) { + if (action == QualityMetric.Action.KEEP) { // if a set is already past the threshold on all metrics, - // save it and no need for further exploration - if (isPastThreshold) { - curOrderSaved.add(curCandidate); - } - else { - // otherwise if a set still has potentially good subsets, - // save it for further examination + // save it and no need for further exploration if we do containment + curOrderSaved.add(curCandidate); + if (!doContainment) { curOrderNext.add(curCandidate); } + } else if (action == QualityMetric.Action.NEXT) { + // otherwise if a set still has potentially good subsets, + // save it for further examination + curOrderNext.add(curCandidate); } else { pruned++; } @@ -269,4 +321,6 @@ private ArrayList getCandidates( } return candidates; } + + public void setDoContainment(boolean doContainment) { this.doContainment = doContainment; } } diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedGlobalRatioMetric.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedGlobalRatioMetric.java new file mode 100644 index 000000000..01c5fe410 --- /dev/null +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedGlobalRatioMetric.java @@ -0,0 +1,67 @@ +package edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics; + +import sketches.MomentSketch; + +import java.util.Arrays; +import java.util.Collections; + +/** + * Measures the relative outlier rate w.r.t. the global outlier rate + */ +public class EstimatedGlobalRatioMetric implements QualityMetric{ + private int minIdx = 0; + private int maxIdx = 1; + private int momentsBaseIdx = 2; + private double baseRate = 0.0; + private double quantile; + private double cutoff; + private double globalCount; + private double tolerance = 1e-10; + + public EstimatedGlobalRatioMetric(int minIdx, int maxIdx, int momentsBaseIdx, + double quantile, double tolerance) { + this.minIdx = minIdx; + this.maxIdx = maxIdx; + this.momentsBaseIdx = momentsBaseIdx; + this.quantile = quantile; + this.tolerance = tolerance; + } + + @Override + public String name() { + return "est_global_ratio"; + } + + @Override + public QualityMetric initialize(double[] globalAggregates) { + globalCount = globalAggregates[momentsBaseIdx]; + MomentSketch ms = new MomentSketch(tolerance); + double[] powerSums = Arrays.copyOfRange(globalAggregates, momentsBaseIdx, globalAggregates.length); + ms.setStats(powerSums, globalAggregates[minIdx], globalAggregates[maxIdx]); + try { + cutoff = ms.getQuantiles(Collections.singletonList(quantile))[0]; + } catch (Exception e) { + cutoff = quantile * (globalAggregates[maxIdx] - globalAggregates[minIdx]) + globalAggregates[minIdx]; + } + baseRate = 1.0 - quantile; + return this; + } + + @Override + public double value(double[] aggregates) { + MomentSketch ms = new MomentSketch(tolerance); + double[] powerSums = Arrays.copyOfRange(aggregates, momentsBaseIdx, aggregates.length); + ms.setStats(powerSums, aggregates[minIdx], aggregates[maxIdx]); + return ms.estimateGreaterThanThreshold(cutoff) / baseRate; + } + + @Override + public boolean canPassThreshold(double[] aggregates, double threshold) { + return true; + } + + @Override + public boolean isMonotonic() { + return false; + } +} diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java new file mode 100644 index 000000000..bd560d57f --- /dev/null +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java @@ -0,0 +1,133 @@ +package edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics; + +import sketches.MomentSketch; + +import java.util.Arrays; +import java.util.Collections; + +/** + * Measures the relative outlier rate w.r.t. the global outlier rate + */ +public class EstimatedSupportMetric implements QualityMetric{ + private int minIdx = 0; + private int maxIdx = 1; + private int momentsBaseIdx = 2; + private double quantile; // eg, 0.99 + private double cutoff; + private double globalCount; + private double tolerance = 1e-10; + private boolean useCascade = true; + + // Statistics + public int numEnterCascade = 0; + public int numAfterNaiveCheck = 0; + public int numAfterMarkovBound = 0; + public int numAfterMomentBound = 0; + + public EstimatedSupportMetric(int minIdx, int maxIdx, int momentsBaseIdx, double quantile, + double tolerance, boolean useCascade) { + this.minIdx = minIdx; + this.maxIdx = maxIdx; + this.momentsBaseIdx = momentsBaseIdx; + this.quantile = quantile; + this.tolerance = tolerance; + this.useCascade = useCascade; + } + + @Override + public String name() { + return "est_support"; + } + + @Override + public QualityMetric initialize(double[] globalAggregates) { + globalCount = globalAggregates[momentsBaseIdx] * (1.0 - quantile); + MomentSketch ms = new MomentSketch(tolerance); + double[] powerSums = Arrays.copyOfRange(globalAggregates, momentsBaseIdx, globalAggregates.length); + ms.setStats(powerSums, globalAggregates[0], globalAggregates[1]); + try { + cutoff = ms.getQuantiles(Collections.singletonList(quantile))[0]; + } catch (Exception e) { + cutoff = quantile * (globalAggregates[maxIdx] - globalAggregates[minIdx]) + globalAggregates[minIdx]; + } + return this; + } + + @Override + public double value(double[] aggregates) { + MomentSketch ms = new MomentSketch(tolerance); + double[] powerSums = Arrays.copyOfRange(aggregates, momentsBaseIdx, aggregates.length); + ms.setStats(powerSums, aggregates[minIdx], aggregates[maxIdx]); + return ms.estimateGreaterThanThreshold(cutoff) * aggregates[momentsBaseIdx] / globalCount; + } + + @Override + public Action getAction(double[] aggregates, double threshold) { + if (useCascade) { + return getActionCascade(aggregates, threshold); + } else { + return getActionMaxent(aggregates, threshold); + } + } + + private Action getActionCascade(double[] aggregates, double threshold) { + numEnterCascade++; + + // Simple checks on min and max + if (aggregates[maxIdx] < cutoff) { + return Action.PRUNE; + } + if (aggregates[minIdx] >= cutoff) { + return Action.KEEP; + } + numAfterNaiveCheck++; + + // Markov bounds + double outlierRateNeeded = threshold * globalCount / aggregates[momentsBaseIdx]; + double mean = aggregates[momentsBaseIdx+1] / aggregates[momentsBaseIdx]; + double min = aggregates[minIdx]; + double max = aggregates[maxIdx]; + double cutoffLowerBound = Math.max(0.0, 1 - (mean - min) / (cutoff - min)); + double cutoffUpperBound = Math.min(1.0, (max - mean) / (max - cutoff)); + double outlierRateUpperBound = 1.0 - cutoffLowerBound; + double outlierRateLowerBound = 1.0 - cutoffUpperBound; + if (outlierRateUpperBound < outlierRateNeeded) { + return Action.PRUNE; + } + if (outlierRateLowerBound >= outlierRateNeeded) { + return Action.KEEP; + } + numAfterMarkovBound++; + + // Moments-based bounds + MomentSketch ms = new MomentSketch(tolerance); + double[] powerSums = Arrays.copyOfRange(aggregates, momentsBaseIdx, aggregates.length); + ms.setStats(powerSums, aggregates[0], aggregates[1]); + double[] bounds = ms.boundGreaterThanThreshold(cutoff); + if (bounds[1] < outlierRateNeeded) { + return Action.PRUNE; + } + if (bounds[0] >= outlierRateNeeded) { + return Action.KEEP; + } + numAfterMomentBound++; + + // Maxent estimate + double outlierRateEstimate = ms.estimateGreaterThanThreshold(cutoff); + return (outlierRateEstimate >= outlierRateNeeded) ? Action.KEEP : Action.PRUNE; + } + + private Action getActionMaxent(double[] aggregates, double threshold) { + double outlierRateNeeded = threshold * globalCount / aggregates[momentsBaseIdx]; + MomentSketch ms = new MomentSketch(tolerance); + double[] powerSums = Arrays.copyOfRange(aggregates, momentsBaseIdx, aggregates.length); + ms.setStats(powerSums, aggregates[minIdx], aggregates[maxIdx]); + double outlierRateEstimate = ms.estimateGreaterThanThreshold(cutoff); + return (outlierRateEstimate >= outlierRateNeeded) ? Action.KEEP : Action.PRUNE; + } + + @Override + public boolean isMonotonic() { + return true; + } +} diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/QualityMetric.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/QualityMetric.java index 4753208e7..a66a366e4 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/QualityMetric.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/QualityMetric.java @@ -10,6 +10,26 @@ public interface QualityMetric { double value(double[] aggregates); boolean isMonotonic(); + enum Action { + KEEP(2), + NEXT(1), + PRUNE(0); + + private int val; + + Action(int val) { + this.val = val; + } + + public static Action combine(Action a, Action b) { + if (a.val <= b.val) { + return a; + } else { + return b; + } + } + } + // can override for more fancy tight quality metric bounds default double maxSubgroupValue(double[] aggregates) { if (isMonotonic()) { @@ -18,4 +38,22 @@ default double maxSubgroupValue(double[] aggregates) { return Double.POSITIVE_INFINITY; } } + + default Action getAction(double[] aggregates, double threshold) { + if (isPastThreshold(aggregates, threshold)) { + return Action.KEEP; + } else if (canPassThreshold(aggregates, threshold)) { + return Action.NEXT; + } else { + return Action.PRUNE; + } + } + + default boolean isPastThreshold(double[] aggregates, double threshold) { + return value(aggregates) >= threshold; + } + + default boolean canPassThreshold(double[] aggregates, double threshold) { + return maxSubgroupValue(aggregates) >= threshold; + } } diff --git a/lib/src/main/resources/log4j.properties b/lib/src/main/resources/log4j.properties new file mode 100644 index 000000000..dc217f5e4 --- /dev/null +++ b/lib/src/main/resources/log4j.properties @@ -0,0 +1,8 @@ +log4j.rootLogger=INFO, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file From 00960ca9ef05610d24eab267c597e861bde02ad8 Mon Sep 17 00:00:00 2001 From: Jialin Ding Date: Mon, 5 Feb 2018 10:50:53 -0800 Subject: [PATCH 2/7] small changes --- Milan Moments Creation.ipynb | 2070 -------- Moments Cube Creation.ipynb | 4234 +++++++++++++++++ ...momentBench.json => momentBenchMilan.json} | 6 +- lib/momentBenchWiki.json | 16 + .../macrobase/APLMomentSummarizerBench.java | 16 +- .../aplinear/APLOutlierSummarizer.java | 17 +- .../metrics/EstimatedSupportMetric.java | 4 +- 7 files changed, 4279 insertions(+), 2084 deletions(-) delete mode 100644 Milan Moments Creation.ipynb create mode 100644 Moments Cube Creation.ipynb rename lib/{momentBench.json => momentBenchMilan.json} (79%) create mode 100644 lib/momentBenchWiki.json diff --git a/Milan Moments Creation.ipynb b/Milan Moments Creation.ipynb deleted file mode 100644 index fe2e6b733..000000000 --- a/Milan Moments Creation.ipynb +++ /dev/null @@ -1,2070 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "data = pd.read_csv('~/Downloads/sms-call-internet-mi-2013-11-01 2.txt', sep='\\t',\n", - " header=None, names = [\"Grid\", \"Time\", \"Country\", \"SMSin\", \"SMSout\",\n", - " \"Callin\", \"Callout\", \"Internet\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD9CAYAAABA8iukAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEyFJREFUeJzt3WGMpdV93/Hvz1AcksYGEndKd1FByjYV9qqtPQIqV9U0\npLBA1CWVbWGhsLjUW8ngptVKzbp9QWViiVRJqakc1I3ZGqI0hNJErMI62w32KOoLMDi2vF6IzQiv\nw46wSbwYd2vF1pp/X9yz68t6Zufs3Jm9d2a+H+lqn+c857n33KO785tznnOfSVUhSVKPN427AZKk\ntcPQkCR1MzQkSd0MDUlSN0NDktTN0JAkdVsyNJLsTfJKki8PlV2S5GCSF9q/F7fyJLk/yVySLyV5\n59A5O1r9F5LsGCp/V5JD7Zz7k+RMryFJGp+ekcangG2nle0GnqyqLcCTbR/gBmBLe+wEHoBBAAB3\nA1cDVwF3D4XAA8AHh87btsRrSJLGZMnQqKo/AY6dVrwdeKhtPwTcPFT+cA08BVyU5FLgeuBgVR2r\nqleBg8C2duwtVfVUDb5l+PBpz7XQa0iSxmS51zSmqurltv0NYKptbwJeGqp3tJWdqfzoAuVneg1J\n0picP+oTVFUlWdV7kSz1Gkl2MpgO48ILL3zXZZddtprNGbvXX3+dN73JNQwLsW8WZ98szr6Br371\nq39ZVW9bqt5yQ+ObSS6tqpfbFNMrrXweGP6JvbmVzQMzp5XPtvLNC9Q/02v8iKraA+wBmJ6ermef\nfXaZb2ttmJ2dZWZmZtzNmEj2zeLsm8XZN5Dk6z31lhut+4CTK6B2AI8Pld/WVlFdA7zWppgOANcl\nubhdAL8OONCOfSfJNW3V1G2nPddCryFJGpMlRxpJfpfBKOGnkxxlsArqXuDRJHcAXwfe16rvB24E\n5oDvAh8AqKpjSe4Bnmn1PlpVJy+uf4jBCq0LgU+3B2d4DUnSmCwZGlX1/kUOXbtA3QLuXOR59gJ7\nFyh/FnjHAuXfWug1JEnjs7Gv/EiSzoqhIUnqZmhIkroZGpKkboaGJKmboSFJ6jbybUQk6Vy6fPcT\np7aP3HvTGFuyMRkakibecFBovJyekiR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEndXHIraSK5zHYy\nOdKQJHUzNCRJ3ZyekjQxznZKyluKnHuONCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN\n0JAkdTM0JEnd/Ea4pLHyxoRriyMNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTNJbeS1oXTl+76\nR5lWx0gjjST/NsnhJF9O8rtJfizJFUmeTjKX5PeSXNDqvrntz7Xjlw89z0da+VeSXD9Uvq2VzSXZ\nPUpbJUmjW3ZoJNkE/GtguqreAZwH3AL8GnBfVf0M8CpwRzvlDuDVVn5fq0eSK9t5bwe2Ab+Z5Lwk\n5wGfAG4ArgTe3+pKksZk1Gsa5wMXJjkf+HHgZeDngMfa8YeAm9v29rZPO35tkrTyR6rqe1X1NWAO\nuKo95qrqxar6PvBIqytJGpNlh0ZVzQO/Dvw5g7B4Dfg88O2qOtGqHQU2te1NwEvt3BOt/k8Nl592\nzmLlkqQxWfaF8CQXM/jN/wrg28D/ZDC9dM4l2QnsBJiammJ2dnYczThnjh8/vu7f43LZN4ub1L7Z\ntfXE0pWW4Wze66T2zSQaZfXUzwNfq6q/AEjy+8C7gYuSnN9GE5uB+VZ/HrgMONqms94KfGuo/KTh\ncxYrf4Oq2gPsAZienq6ZmZkR3tbkm52dZb2/x+WybxY3qX1z+yrdsPDIrTPddSe1bybRKNc0/hy4\nJsmPt2sT1wLPAZ8F3tPq7AAeb9v72j7t+Geqqlr5LW111RXAFuBzwDPAlrYa6wIGF8v3jdBeSdKI\nlj3SqKqnkzwG/ClwAvgCg9/2nwAeSfKrrezBdsqDwG8nmQOOMQgBqupwkkcZBM4J4M6q+gFAkruA\nAwxWZu2tqsPLba8kaXQjfbmvqu4G7j6t+EUGK59Or/tXwHsXeZ6PAR9boHw/sH+UNkqaPP4NjbXL\n24hIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRuhoYkqZuhIUnq\nZmhIkrqNdJdbSerlnW3XB0cakqRuhoYkqZuhIUnqZmhIkrp5IVzSujR84f3IvTeNsSXriyMNSVI3\nQ0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEnd/HKfpFXjnW3XH0cakqRuhoYkqZuhIUnq\nZmhIkroZGpKkboaGJKnbSKGR5KIkjyX5syTPJ/mHSS5JcjDJC+3fi1vdJLk/yVySLyV559Dz7Gj1\nX0iyY6j8XUkOtXPuT5JR2itJGs2oI42PA39UVX8X+HvA88Bu4Mmq2gI82fYBbgC2tMdO4AGAJJcA\ndwNXA1cBd58Mmlbng0PnbRuxvdLYXL77iVMPaa1admgkeSvwj4EHAarq+1X1bWA78FCr9hBwc9ve\nDjxcA08BFyW5FLgeOFhVx6rqVeAgsK0de0tVPVVVBTw89FySpDEYZaRxBfAXwH9P8oUkn0zyE8BU\nVb3c6nwDmGrbm4CXhs4/2srOVH50gXJJ0piMchuR84F3Ah+uqqeTfJwfTkUBUFWVpEZpYI8kOxlM\neTE1NcXs7Oxqv+RYHT9+fN2/x+WahL45NP/aqe2tm956anvX1hOntsfRxnH0zfB7Hqel3vckfG7W\nilFC4yhwtKqebvuPMQiNbya5tKpeblNMr7Tj88BlQ+dvbmXzwMxp5bOtfPMC9X9EVe0B9gBMT0/X\nzMzMQtXWjdnZWdb7e1yuSeib24euWRy5dWbJ8nNlHH1z+4Rcv1mqvyfhc7NWLHt6qqq+AbyU5Gdb\n0bXAc8A+4OQKqB3A4217H3BbW0V1DfBam8Y6AFyX5OJ2Afw64EA79p0k17RVU7cNPZckaQxGvcvt\nh4HfSXIB8CLwAQZB9GiSO4CvA+9rdfcDNwJzwHdbXarqWJJ7gGdavY9W1bG2/SHgU8CFwKfbQ5I0\nJiOFRlV9EZhe4NC1C9Qt4M5FnmcvsHeB8meBd4zSRknSyvEb4ZKkboaGJKmboSFJ6uafe5W0orxN\nyvpmaEha94aD7Mi9N42xJWuf01OSpG6GhiSpm9NT0gpwHl8bhSMNSVI3Q0OS1M3QkCR1MzQkSd0M\nDUlSN0NDktTN0JAkdTM0JEnd/HKftIr80p/WG0cakqRujjQkjcwR1cbhSEOS1M3QkCR1MzQkSd0M\nDUlSNy+ES8vkxV9tRIaGdBYMCm10hoakDWU4+I/ce9MYW7I2eU1DktTN0JAkdTM0JEndDA1JUjdD\nQ5LUzdVTkpbF5ccbkyMNSVI3RxrSGPhdAa1VjjQkSd1GDo0k5yX5QpI/bPtXJHk6yVyS30tyQSt/\nc9ufa8cvH3qOj7TyryS5fqh8WyubS7J71LZKkkazEiONXwaeH9r/NeC+qvoZ4FXgjlZ+B/BqK7+v\n1SPJlcAtwNuBbcBvtiA6D/gEcANwJfD+VldaVy7f/cSphzTpRgqNJJuBm4BPtv0APwc81qo8BNzc\ntre3fdrxa1v97cAjVfW9qvoaMAdc1R5zVfViVX0feKTVldYtA0STbtSRxn8B/h3wetv/KeDbVXWi\n7R8FNrXtTcBLAO34a63+qfLTzlmsXJI0JstePZXkF4BXqurzSWZWrknLastOYCfA1NQUs7Oz42zO\nqjt+/Pi6f4/Ltdp9s2vriaUrrZCVfh8r3Tfnsi9Wy8n+8P9Uv1GW3L4b+GdJbgR+DHgL8HHgoiTn\nt9HEZmC+1Z8HLgOOJjkfeCvwraHyk4bPWaz8DapqD7AHYHp6umZmZkZ4W5NvdnaW9f4el2u1++b2\nczhtdOTWmRV9vpXum3PZF6vm0P8DYNfWH/DhX5gZb1vWiGVPT1XVR6pqc1VdzuBC9meq6lbgs8B7\nWrUdwONte1/bpx3/TFVVK7+lra66AtgCfA54BtjSVmNd0F5j33LbK0ka3Wp8ue9XgEeS/CrwBeDB\nVv4g8NtJ5oBjDEKAqjqc5FHgOeAEcGdV/QAgyV3AAeA8YG9VHV6F9kqSOq1IaFTVLDDbtl9ksPLp\n9Dp/Bbx3kfM/BnxsgfL9wP6VaKMkaXR+I1yS1M3QkCR1MzQkSd0MDUlSN2+NLqmbtzeRIw1JUjdD\nQ5LUzdCQJHUzNCRJ3bwQLjX+3W5paY40JEndHGloQ3MJqXR2DA1JwunJXk5PSZK6GRqSpG5OT0k6\nI6/7aJihIU0o59g1iQwNaQH+wJYW5jUNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN1dPSdJpXD23OEca\nkqRujjSkJfiNaOmHHGlIkro50pD0IxxdaTGONCRJ3QwNSVI3Q0OS1M1rGtIa4PcGNCkcaUiSuhka\nkqRuyw6NJJcl+WyS55IcTvLLrfySJAeTvND+vbiVJ8n9SeaSfCnJO4eea0er/0KSHUPl70pyqJ1z\nf5KM8mal9eDy3U+84SGdS6Nc0zgB7KqqP03yk8DnkxwEbgeerKp7k+wGdgO/AtwAbGmPq4EHgKuT\nXALcDUwD1Z5nX1W92up8EHga2A9sAz49Qpu1Qa3nawLr+b1p8iw7NKrqZeDltv1/kzwPbAK2AzOt\n2kPALIPQ2A48XFUFPJXkoiSXtroHq+oYQAuebUlmgbdU1VOt/GHgZgwNjcjfznU2DOU3WpFrGkku\nB/4BgxHBVAsUgG8AU217E/DS0GlHW9mZyo8uUC5JGpORl9wm+evA/wL+TVV9Z/iyQ1VVkhr1NTra\nsBPYCTA1NcXs7Oxqv+RYHT9+fN2/x+VarG92bT1x7hszBmf6XCz1uTk0/9qp7V1bV7BRa8DUhX2f\nEf/fjRgaSf4ag8D4nar6/Vb8zSSXVtXLbfrplVY+D1w2dPrmVjbPD6ezTpbPtvLNC9T/EVW1B9gD\nMD09XTMzMwtVWzdmZ2dZ7+9xuRbrm9s3yJTUkVtnFj221Odmo/TRQnZtPcFvHFr6x+GZ+nejGGX1\nVIAHgeer6j8PHdoHnFwBtQN4fKj8traK6hrgtTaNdQC4LsnFbaXVdcCBduw7Sa5pr3Xb0HNJksZg\nlJHGu4FfAg4l+WIr+/fAvcCjSe4Avg68rx3bD9wIzAHfBT4AUFXHktwDPNPqffTkRXHgQ8CngAsZ\nXAD3Irh0Bl601WobZfXU/wEW+97EtQvUL+DORZ5rL7B3gfJngXcst43a2FwlJa08vxEuSepmaEiS\nuhkakqRu3hpd2sC87qOzZWhI69TpgbBr6wlu3/2Eq6o0EkNDkjq5pNnQkDYcp6Q0CkND68qh+dc2\n9O0wpNXm6ilJUjdDQ5LUzdCQJHXzmobWvOELuxvt70BI55ojDUlSN0NDktTN0JAkdTM0JEndDA1J\nUjdXT2lN8lYY0ngYGpK0DBv15oWGhtYMRxfS+BkamliGhDR5DA1NFINCmmyunpIkdTM0JEndnJ7S\nWDgNJa1NjjQkSd0MDUlSN6enJGlEG+mLfoaGzhmvY0hrn6GhVWVQSOuLoaEVZ1BI65ehoWXbSPO4\nkgYMDUlaQev9lylDQ2dlsaknp6SkjcHQ0IIMAUkLmfjQSLIN+DhwHvDJqrp3zE1atwwKaWWtx6mq\niQ6NJOcBnwD+KXAUeCbJvqp6brwtWz8MCklnY6JDA7gKmKuqFwGSPAJsBzZkaJz8Ab9r6wlmFiiX\nNLnWy6hj0kNjE/DS0P5R4OoxtWVFnekH/fAHygvP0vqzlgMkVTXuNiwqyXuAbVX1L9v+LwFXV9Vd\np9XbCexsuz8LfOWcNvTc+2ngL8fdiAll3yzOvlmcfQN/u6retlSlSR9pzAOXDe1vbmVvUFV7gD3n\nqlHjluTZqpoedzsmkX2zOPtmcfZNv0m/NfozwJYkVyS5ALgF2DfmNknShjXRI42qOpHkLuAAgyW3\ne6vq8JibJUkb1kSHBkBV7Qf2j7sdE2bDTMUtg32zOPtmcfZNp4m+EC5JmiyTfk1DkjRBDI01JMl7\nkxxO8nqS6dOOfSTJXJKvJLl+XG2cBEn+Y5L5JF9sjxvH3aZxSrKtfS7mkuwed3smSZIjSQ61z8mz\n427PWjDx1zT0Bl8G/jnw34YLk1zJYGXZ24G/Bfxxkr9TVT84902cGPdV1a+PuxHj5q14uvyTqtro\n39Ho5khjDamq56tqoS8ubgceqarvVdXXgDkGt2CRTt2Kp6q+D5y8FY+0LIbG+rDQ7VY2jaktk+Ku\nJF9KsjfJxeNuzBj52TizAv53ks+3O0toCU5PTZgkfwz8zQUO/Yeqevxct2dSnamfgAeAexj8QLgH\n+A3gX5y71mkN+UdVNZ/kbwAHk/xZVf3JuBs1yQyNCVNVP7+M07put7Ke9PZTkt8C/nCVmzPJNtxn\n42xU1Xz795Ukf8BgOs/QOAOnp9aHfcAtSd6c5ApgC/C5MbdpbJJcOrT7iwwWEGxU3opnEUl+IslP\nntwGrmNjf1a6ONJYQ5L8IvBfgbcBTyT5YlVdX1WHkzzK4O+MnADu3OArp/5Tkr/PYHrqCPCvxtuc\n8fFWPGc0BfxBEhj8LPwfVfVH423S5PMb4ZKkbk5PSZK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRu\nhoYkqZuhIUnq9v8BYuvezWR1bPsAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "np.log(data[\"Internet\"]).hist(bins=100)" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "metric = \"Internet\"\n", - "data = data[[\"Grid\", \"Country\", metric]]\n", - "data = data[np.isfinite(data[metric])]\n", - "# data = data.head(1000000)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GridCountryminmaxm0m1m2m3m4m5m6m7m8
01320.0017870.0017871.00.0017873.194477e-065.709522e-091.020469e-111.823894e-143.259864e-175.826388e-201.041356e-22
11330.0261370.0261375.00.1306873.415825e-038.928086e-052.333572e-066.099355e-081.594214e-094.166866e-111.089111e-12
21394.65279119.834697144.01506.5654501.733675e+042.143301e+052.798359e+063.819016e+075.417785e+087.966347e+091.211974e+11
31460.0017870.02613714.00.2685236.844427e-031.785846e-044.667184e-061.219872e-073.188429e-098.333732e-112.178223e-12
41490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
52320.0009220.0009221.00.0009228.492699e-077.826518e-107.212594e-136.646827e-166.125439e-195.644950e-225.202151e-25
62330.0273560.0273565.00.1367803.741749e-031.023592e-042.800137e-067.660049e-082.095482e-095.732396e-111.568153e-12
72394.65420519.886492144.01512.3629081.747867e+042.170730e+052.847356e+063.903768e+075.562676e+088.213929e+091.254549e+11
82460.0009220.02735614.00.2772467.486895e-032.047216e-045.600277e-061.532010e-074.190964e-091.146479e-103.136307e-12
92490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
103330.0286530.0286535.00.1432654.104998e-031.176209e-043.370201e-069.656667e-082.766933e-097.928119e-112.271651e-12
113394.65571019.941626144.01518.5340731.763052e+042.200233e+052.900338e+063.995898e+075.720999e+088.485816e+091.301526e+11
123460.0286530.02865310.00.2865318.209995e-032.352417e-046.740402e-061.931333e-075.533867e-091.585624e-104.543302e-12
133490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
144320.0042950.0042951.00.0042951.844681e-057.922861e-083.402850e-101.461516e-126.277173e-152.696031e-171.157939e-19
154330.0226080.0226085.00.1130392.555574e-035.777604e-051.306192e-062.953019e-086.676141e-101.509332e-113.412274e-13
164394.64869619.684672144.01489.7730061.692965e+042.065406e+052.660609e+063.583104e+075.018357e+087.290160e+091.096720e+11
174460.0042950.02260814.00.2432585.184936e-031.158690e-042.613745e-065.906622e-081.335253e-093.018674e-116.824554e-13
184490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
195320.0049210.0049211.00.0049212.421239e-051.191397e-075.862398e-102.884657e-121.419427e-146.984444e-173.436772e-19
205330.0217270.0217275.00.1086362.360369e-035.128436e-051.114269e-062.421001e-085.260172e-101.142891e-112.483190e-13
215394.24979717.594501144.01356.4280541.401935e+041.553800e+051.816012e+062.215515e+072.805796e+083.677848e+094.980961e+10
225460.0049210.02172714.00.2369554.817588e-031.030453e-042.230882e-064.843155e-081.052091e-092.285810e-114.966393e-13
235490.0236550.0236553.00.0709641.678625e-033.970727e-059.392609e-072.221787e-085.255557e-101.243183e-112.940704e-13
246330.0286530.0286535.00.1432654.104998e-031.176209e-043.370201e-069.656667e-082.766933e-097.928119e-112.271651e-12
256394.65571019.941626144.01518.5340731.763052e+042.200233e+052.900338e+063.995898e+075.720999e+088.485816e+091.301526e+11
266460.0286530.02865310.00.2865318.209995e-032.352417e-046.740402e-061.931333e-075.533867e-091.585624e-104.543302e-12
276490.0273000.0273003.00.0819012.235946e-036.104237e-051.666485e-064.549582e-081.242057e-093.390873e-119.257241e-13
287330.0286530.0286535.00.1432654.104998e-031.176209e-043.370201e-069.656667e-082.766933e-097.928119e-112.271651e-12
297394.65571019.941626144.01518.5340731.763052e+042.200233e+052.900338e+063.995898e+075.720999e+088.485816e+091.301526e+11
..........................................
1325339997860.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325349997882390.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
1325359998330.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325369998340.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
13253799983919.02167181.737160144.05837.4915632.642833e+051.320879e+077.187426e+084.198497e+102.598923e+121.685744e+141.135115e+16
1325389998410.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325399998460.0859950.0859954.00.3439792.958040e-022.543760e-032.187501e-041.881136e-051.617679e-061.391119e-071.196290e-08
1325409998490.0859950.0859956.00.5159694.437061e-023.815640e-033.281251e-042.821704e-052.426518e-062.086679e-071.794435e-08
1325419998860.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325429998882390.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
1325439999330.0859950.2663014.00.5298309.407055e-022.091992e-025.207982e-031.354985e-033.580292e-049.509695e-052.530273e-05
1325449999340.0147080.1747624.00.3642314.603269e-026.676090e-031.049617e-031.732318e-042.938279e-055.057017e-068.769517e-07
13254599993910.34521363.231768144.03857.6627391.215812e+054.409394e+061.793299e+087.978308e+093.801183e+111.907161e+139.951139e+14
1325469999410.0859950.0887674.00.3522963.103386e-022.734283e-032.409512e-042.123691e-051.872100e-061.650596e-071.455544e-08
1325479999430.0887670.0887671.00.0887677.879586e-036.994475e-046.208788e-055.511357e-064.892268e-074.342721e-083.854905e-09
1325489999440.0887670.0887671.00.0887677.879586e-036.994475e-046.208788e-055.511357e-064.892268e-074.342721e-083.854905e-09
1325499999460.0147080.0859958.00.4028093.044565e-022.556486e-032.189372e-041.881412e-051.617719e-061.391125e-071.196291e-08
1325509999490.0147080.0859959.00.5600914.501954e-023.825184e-033.282655e-042.821911e-052.426549e-062.086683e-071.794435e-08
1325519999860.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
1325529999882390.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
13255310000330.0859950.6247364.01.1272214.844223e-012.625288e-011.561461e-019.595410e-025.961722e-023.717687e-022.321156e-02
13255410000340.0542310.2942404.00.6427111.402795e-013.530072e-029.439573e-032.602313e-037.309347e-042.079667e-045.972406e-05
13255510000398.17025572.936852144.03038.5935347.865887e+042.549735e+061.037753e+085.133570e+092.919976e+111.813755e+131.187581e+15
13255610000410.0859950.2082454.00.7107311.374934e-012.772831e-025.696546e-031.179593e-032.450699e-045.098521e-051.061318e-05
13255710000430.2082450.2082451.00.2082454.336611e-029.030789e-031.880620e-033.916302e-048.155516e-051.698348e-053.536730e-06
13255810000440.2082450.2082451.00.2082454.336611e-029.030789e-031.880620e-033.916302e-048.155516e-051.698348e-053.536730e-06
13255910000460.0542310.0859958.00.5609044.134455e-023.181745e-032.533488e-042.068770e-051.719435e-061.446303e-071.226217e-08
13256010000490.0542310.0859959.00.6786635.319371e-024.294129e-033.540742e-042.962430e-052.502835e-062.128067e-071.816880e-08
13256110000860.0859950.0859951.00.0859957.395101e-036.359400e-045.468752e-054.702841e-064.044197e-073.477798e-082.990725e-09
13256210000882390.0859950.0859952.00.1719901.479020e-021.271880e-031.093750e-049.405681e-068.088394e-076.955596e-085.981449e-09
\n", - "

132563 rows × 13 columns

\n", - "
" - ], - "text/plain": [ - " Grid Country min max m0 m1 \\\n", - "0 1 32 0.001787 0.001787 1.0 0.001787 \n", - "1 1 33 0.026137 0.026137 5.0 0.130687 \n", - "2 1 39 4.652791 19.834697 144.0 1506.565450 \n", - "3 1 46 0.001787 0.026137 14.0 0.268523 \n", - "4 1 49 0.027300 0.027300 3.0 0.081901 \n", - "5 2 32 0.000922 0.000922 1.0 0.000922 \n", - "6 2 33 0.027356 0.027356 5.0 0.136780 \n", - "7 2 39 4.654205 19.886492 144.0 1512.362908 \n", - "8 2 46 0.000922 0.027356 14.0 0.277246 \n", - "9 2 49 0.027300 0.027300 3.0 0.081901 \n", - "10 3 33 0.028653 0.028653 5.0 0.143265 \n", - "11 3 39 4.655710 19.941626 144.0 1518.534073 \n", - "12 3 46 0.028653 0.028653 10.0 0.286531 \n", - "13 3 49 0.027300 0.027300 3.0 0.081901 \n", - "14 4 32 0.004295 0.004295 1.0 0.004295 \n", - "15 4 33 0.022608 0.022608 5.0 0.113039 \n", - "16 4 39 4.648696 19.684672 144.0 1489.773006 \n", - "17 4 46 0.004295 0.022608 14.0 0.243258 \n", - "18 4 49 0.027300 0.027300 3.0 0.081901 \n", - "19 5 32 0.004921 0.004921 1.0 0.004921 \n", - "20 5 33 0.021727 0.021727 5.0 0.108636 \n", - "21 5 39 4.249797 17.594501 144.0 1356.428054 \n", - "22 5 46 0.004921 0.021727 14.0 0.236955 \n", - "23 5 49 0.023655 0.023655 3.0 0.070964 \n", - "24 6 33 0.028653 0.028653 5.0 0.143265 \n", - "25 6 39 4.655710 19.941626 144.0 1518.534073 \n", - "26 6 46 0.028653 0.028653 10.0 0.286531 \n", - "27 6 49 0.027300 0.027300 3.0 0.081901 \n", - "28 7 33 0.028653 0.028653 5.0 0.143265 \n", - "29 7 39 4.655710 19.941626 144.0 1518.534073 \n", - "... ... ... ... ... ... ... \n", - "132533 9997 86 0.085995 0.085995 1.0 0.085995 \n", - "132534 9997 88239 0.085995 0.085995 2.0 0.171990 \n", - "132535 9998 33 0.085995 0.085995 1.0 0.085995 \n", - "132536 9998 34 0.085995 0.085995 2.0 0.171990 \n", - "132537 9998 39 19.021671 81.737160 144.0 5837.491563 \n", - "132538 9998 41 0.085995 0.085995 1.0 0.085995 \n", - "132539 9998 46 0.085995 0.085995 4.0 0.343979 \n", - "132540 9998 49 0.085995 0.085995 6.0 0.515969 \n", - "132541 9998 86 0.085995 0.085995 1.0 0.085995 \n", - "132542 9998 88239 0.085995 0.085995 2.0 0.171990 \n", - "132543 9999 33 0.085995 0.266301 4.0 0.529830 \n", - "132544 9999 34 0.014708 0.174762 4.0 0.364231 \n", - "132545 9999 39 10.345213 63.231768 144.0 3857.662739 \n", - "132546 9999 41 0.085995 0.088767 4.0 0.352296 \n", - "132547 9999 43 0.088767 0.088767 1.0 0.088767 \n", - "132548 9999 44 0.088767 0.088767 1.0 0.088767 \n", - "132549 9999 46 0.014708 0.085995 8.0 0.402809 \n", - "132550 9999 49 0.014708 0.085995 9.0 0.560091 \n", - "132551 9999 86 0.085995 0.085995 1.0 0.085995 \n", - "132552 9999 88239 0.085995 0.085995 2.0 0.171990 \n", - "132553 10000 33 0.085995 0.624736 4.0 1.127221 \n", - "132554 10000 34 0.054231 0.294240 4.0 0.642711 \n", - "132555 10000 39 8.170255 72.936852 144.0 3038.593534 \n", - "132556 10000 41 0.085995 0.208245 4.0 0.710731 \n", - "132557 10000 43 0.208245 0.208245 1.0 0.208245 \n", - "132558 10000 44 0.208245 0.208245 1.0 0.208245 \n", - "132559 10000 46 0.054231 0.085995 8.0 0.560904 \n", - "132560 10000 49 0.054231 0.085995 9.0 0.678663 \n", - "132561 10000 86 0.085995 0.085995 1.0 0.085995 \n", - "132562 10000 88239 0.085995 0.085995 2.0 0.171990 \n", - "\n", - " m2 m3 m4 m5 m6 \\\n", - "0 3.194477e-06 5.709522e-09 1.020469e-11 1.823894e-14 3.259864e-17 \n", - "1 3.415825e-03 8.928086e-05 2.333572e-06 6.099355e-08 1.594214e-09 \n", - "2 1.733675e+04 2.143301e+05 2.798359e+06 3.819016e+07 5.417785e+08 \n", - "3 6.844427e-03 1.785846e-04 4.667184e-06 1.219872e-07 3.188429e-09 \n", - "4 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", - "5 8.492699e-07 7.826518e-10 7.212594e-13 6.646827e-16 6.125439e-19 \n", - "6 3.741749e-03 1.023592e-04 2.800137e-06 7.660049e-08 2.095482e-09 \n", - "7 1.747867e+04 2.170730e+05 2.847356e+06 3.903768e+07 5.562676e+08 \n", - "8 7.486895e-03 2.047216e-04 5.600277e-06 1.532010e-07 4.190964e-09 \n", - "9 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", - "10 4.104998e-03 1.176209e-04 3.370201e-06 9.656667e-08 2.766933e-09 \n", - "11 1.763052e+04 2.200233e+05 2.900338e+06 3.995898e+07 5.720999e+08 \n", - "12 8.209995e-03 2.352417e-04 6.740402e-06 1.931333e-07 5.533867e-09 \n", - "13 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", - "14 1.844681e-05 7.922861e-08 3.402850e-10 1.461516e-12 6.277173e-15 \n", - "15 2.555574e-03 5.777604e-05 1.306192e-06 2.953019e-08 6.676141e-10 \n", - "16 1.692965e+04 2.065406e+05 2.660609e+06 3.583104e+07 5.018357e+08 \n", - "17 5.184936e-03 1.158690e-04 2.613745e-06 5.906622e-08 1.335253e-09 \n", - "18 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", - "19 2.421239e-05 1.191397e-07 5.862398e-10 2.884657e-12 1.419427e-14 \n", - "20 2.360369e-03 5.128436e-05 1.114269e-06 2.421001e-08 5.260172e-10 \n", - "21 1.401935e+04 1.553800e+05 1.816012e+06 2.215515e+07 2.805796e+08 \n", - "22 4.817588e-03 1.030453e-04 2.230882e-06 4.843155e-08 1.052091e-09 \n", - "23 1.678625e-03 3.970727e-05 9.392609e-07 2.221787e-08 5.255557e-10 \n", - "24 4.104998e-03 1.176209e-04 3.370201e-06 9.656667e-08 2.766933e-09 \n", - "25 1.763052e+04 2.200233e+05 2.900338e+06 3.995898e+07 5.720999e+08 \n", - "26 8.209995e-03 2.352417e-04 6.740402e-06 1.931333e-07 5.533867e-09 \n", - "27 2.235946e-03 6.104237e-05 1.666485e-06 4.549582e-08 1.242057e-09 \n", - "28 4.104998e-03 1.176209e-04 3.370201e-06 9.656667e-08 2.766933e-09 \n", - "29 1.763052e+04 2.200233e+05 2.900338e+06 3.995898e+07 5.720999e+08 \n", - "... ... ... ... ... ... \n", - "132533 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", - "132534 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", - "132535 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", - "132536 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", - "132537 2.642833e+05 1.320879e+07 7.187426e+08 4.198497e+10 2.598923e+12 \n", - "132538 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", - "132539 2.958040e-02 2.543760e-03 2.187501e-04 1.881136e-05 1.617679e-06 \n", - "132540 4.437061e-02 3.815640e-03 3.281251e-04 2.821704e-05 2.426518e-06 \n", - "132541 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", - "132542 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", - "132543 9.407055e-02 2.091992e-02 5.207982e-03 1.354985e-03 3.580292e-04 \n", - "132544 4.603269e-02 6.676090e-03 1.049617e-03 1.732318e-04 2.938279e-05 \n", - "132545 1.215812e+05 4.409394e+06 1.793299e+08 7.978308e+09 3.801183e+11 \n", - "132546 3.103386e-02 2.734283e-03 2.409512e-04 2.123691e-05 1.872100e-06 \n", - "132547 7.879586e-03 6.994475e-04 6.208788e-05 5.511357e-06 4.892268e-07 \n", - "132548 7.879586e-03 6.994475e-04 6.208788e-05 5.511357e-06 4.892268e-07 \n", - "132549 3.044565e-02 2.556486e-03 2.189372e-04 1.881412e-05 1.617719e-06 \n", - "132550 4.501954e-02 3.825184e-03 3.282655e-04 2.821911e-05 2.426549e-06 \n", - "132551 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", - "132552 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", - "132553 4.844223e-01 2.625288e-01 1.561461e-01 9.595410e-02 5.961722e-02 \n", - "132554 1.402795e-01 3.530072e-02 9.439573e-03 2.602313e-03 7.309347e-04 \n", - "132555 7.865887e+04 2.549735e+06 1.037753e+08 5.133570e+09 2.919976e+11 \n", - "132556 1.374934e-01 2.772831e-02 5.696546e-03 1.179593e-03 2.450699e-04 \n", - "132557 4.336611e-02 9.030789e-03 1.880620e-03 3.916302e-04 8.155516e-05 \n", - "132558 4.336611e-02 9.030789e-03 1.880620e-03 3.916302e-04 8.155516e-05 \n", - "132559 4.134455e-02 3.181745e-03 2.533488e-04 2.068770e-05 1.719435e-06 \n", - "132560 5.319371e-02 4.294129e-03 3.540742e-04 2.962430e-05 2.502835e-06 \n", - "132561 7.395101e-03 6.359400e-04 5.468752e-05 4.702841e-06 4.044197e-07 \n", - "132562 1.479020e-02 1.271880e-03 1.093750e-04 9.405681e-06 8.088394e-07 \n", - "\n", - " m7 m8 \n", - "0 5.826388e-20 1.041356e-22 \n", - "1 4.166866e-11 1.089111e-12 \n", - "2 7.966347e+09 1.211974e+11 \n", - "3 8.333732e-11 2.178223e-12 \n", - "4 3.390873e-11 9.257241e-13 \n", - "5 5.644950e-22 5.202151e-25 \n", - "6 5.732396e-11 1.568153e-12 \n", - "7 8.213929e+09 1.254549e+11 \n", - "8 1.146479e-10 3.136307e-12 \n", - "9 3.390873e-11 9.257241e-13 \n", - "10 7.928119e-11 2.271651e-12 \n", - "11 8.485816e+09 1.301526e+11 \n", - "12 1.585624e-10 4.543302e-12 \n", - "13 3.390873e-11 9.257241e-13 \n", - "14 2.696031e-17 1.157939e-19 \n", - "15 1.509332e-11 3.412274e-13 \n", - "16 7.290160e+09 1.096720e+11 \n", - "17 3.018674e-11 6.824554e-13 \n", - "18 3.390873e-11 9.257241e-13 \n", - "19 6.984444e-17 3.436772e-19 \n", - "20 1.142891e-11 2.483190e-13 \n", - "21 3.677848e+09 4.980961e+10 \n", - "22 2.285810e-11 4.966393e-13 \n", - "23 1.243183e-11 2.940704e-13 \n", - "24 7.928119e-11 2.271651e-12 \n", - "25 8.485816e+09 1.301526e+11 \n", - "26 1.585624e-10 4.543302e-12 \n", - "27 3.390873e-11 9.257241e-13 \n", - "28 7.928119e-11 2.271651e-12 \n", - "29 8.485816e+09 1.301526e+11 \n", - "... ... ... \n", - "132533 3.477798e-08 2.990725e-09 \n", - "132534 6.955596e-08 5.981449e-09 \n", - "132535 3.477798e-08 2.990725e-09 \n", - "132536 6.955596e-08 5.981449e-09 \n", - "132537 1.685744e+14 1.135115e+16 \n", - "132538 3.477798e-08 2.990725e-09 \n", - "132539 1.391119e-07 1.196290e-08 \n", - "132540 2.086679e-07 1.794435e-08 \n", - "132541 3.477798e-08 2.990725e-09 \n", - "132542 6.955596e-08 5.981449e-09 \n", - "132543 9.509695e-05 2.530273e-05 \n", - "132544 5.057017e-06 8.769517e-07 \n", - "132545 1.907161e+13 9.951139e+14 \n", - "132546 1.650596e-07 1.455544e-08 \n", - "132547 4.342721e-08 3.854905e-09 \n", - "132548 4.342721e-08 3.854905e-09 \n", - "132549 1.391125e-07 1.196291e-08 \n", - "132550 2.086683e-07 1.794435e-08 \n", - "132551 3.477798e-08 2.990725e-09 \n", - "132552 6.955596e-08 5.981449e-09 \n", - "132553 3.717687e-02 2.321156e-02 \n", - "132554 2.079667e-04 5.972406e-05 \n", - "132555 1.813755e+13 1.187581e+15 \n", - "132556 5.098521e-05 1.061318e-05 \n", - "132557 1.698348e-05 3.536730e-06 \n", - "132558 1.698348e-05 3.536730e-06 \n", - "132559 1.446303e-07 1.226217e-08 \n", - "132560 2.128067e-07 1.816880e-08 \n", - "132561 3.477798e-08 2.990725e-09 \n", - "132562 6.955596e-08 5.981449e-09 \n", - "\n", - "[132563 rows x 13 columns]" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def moment(n):\n", - " def moment_(x):\n", - " return np.sum(np.power(x, n))\n", - " moment_.__name__ = 'm%s' % n\n", - " return moment_\n", - "\n", - "cube = data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", - " 'min',\n", - " 'max',\n", - " moment(0),\n", - " moment(1),\n", - " moment(2),\n", - " moment(3),\n", - " moment(4),\n", - " moment(5),\n", - " moment(6),\n", - " moment(7),\n", - " moment(8)\n", - "]}).reset_index(col_level=1)\n", - "cube.columns = cube.columns.get_level_values(1)\n", - "cube" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "cube.to_csv('lib/src/test/resources/milan_moments_cubed.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GridCountrycountoutliers1outliers5outliers10
013210.00.00.0
113350.00.00.0
21391440.00.00.0
3146140.00.00.0
414930.00.00.0
523210.00.00.0
623350.00.00.0
72391440.00.00.0
8246140.00.00.0
924930.00.00.0
1033350.00.00.0
113391440.00.00.0
12346100.00.00.0
1334930.00.00.0
1443210.00.00.0
1543350.00.00.0
164391440.00.00.0
17446140.00.00.0
1844930.00.00.0
1953210.00.00.0
2053350.00.00.0
215391440.00.00.0
22546140.00.00.0
2354930.00.00.0
24103350.00.00.0
2510391440.00.00.0
261046100.00.00.0
27104930.00.00.0
28113350.00.00.0
2911391440.00.00.0
.....................
53293502749150.00.00.0
53294502790100.00.00.0
53295502735310.00.00.0
53296502737010.00.00.0
5329750278823910.00.00.0
532985028120.00.00.0
5329950283310.00.00.0
533005028391440.00.06.0
5330150284130.00.00.0
5330250284420.00.00.0
53303502846480.00.00.0
5330450284910.00.00.0
53305502835220.00.00.0
53306502835310.00.00.0
5330750293320.00.00.0
53308502939330.00.01.0
53309502946120.00.00.0
5331050294910.00.00.0
53311502935220.00.00.0
53312502935310.00.00.0
53313100003340.00.00.0
53314100003440.00.00.0
5331510000391440.00.01.0
53316100004140.00.00.0
53317100004310.00.00.0
53318100004410.00.00.0
53319100004680.00.00.0
53320100004990.00.00.0
53321100008610.00.00.0
53322100008823920.00.00.0
\n", - "

53323 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " Grid Country count outliers1 outliers5 outliers10\n", - "0 1 32 1 0.0 0.0 0.0\n", - "1 1 33 5 0.0 0.0 0.0\n", - "2 1 39 144 0.0 0.0 0.0\n", - "3 1 46 14 0.0 0.0 0.0\n", - "4 1 49 3 0.0 0.0 0.0\n", - "5 2 32 1 0.0 0.0 0.0\n", - "6 2 33 5 0.0 0.0 0.0\n", - "7 2 39 144 0.0 0.0 0.0\n", - "8 2 46 14 0.0 0.0 0.0\n", - "9 2 49 3 0.0 0.0 0.0\n", - "10 3 33 5 0.0 0.0 0.0\n", - "11 3 39 144 0.0 0.0 0.0\n", - "12 3 46 10 0.0 0.0 0.0\n", - "13 3 49 3 0.0 0.0 0.0\n", - "14 4 32 1 0.0 0.0 0.0\n", - "15 4 33 5 0.0 0.0 0.0\n", - "16 4 39 144 0.0 0.0 0.0\n", - "17 4 46 14 0.0 0.0 0.0\n", - "18 4 49 3 0.0 0.0 0.0\n", - "19 5 32 1 0.0 0.0 0.0\n", - "20 5 33 5 0.0 0.0 0.0\n", - "21 5 39 144 0.0 0.0 0.0\n", - "22 5 46 14 0.0 0.0 0.0\n", - "23 5 49 3 0.0 0.0 0.0\n", - "24 10 33 5 0.0 0.0 0.0\n", - "25 10 39 144 0.0 0.0 0.0\n", - "26 10 46 10 0.0 0.0 0.0\n", - "27 10 49 3 0.0 0.0 0.0\n", - "28 11 33 5 0.0 0.0 0.0\n", - "29 11 39 144 0.0 0.0 0.0\n", - "... ... ... ... ... ... ...\n", - "53293 5027 49 15 0.0 0.0 0.0\n", - "53294 5027 90 10 0.0 0.0 0.0\n", - "53295 5027 353 1 0.0 0.0 0.0\n", - "53296 5027 370 1 0.0 0.0 0.0\n", - "53297 5027 88239 1 0.0 0.0 0.0\n", - "53298 5028 1 2 0.0 0.0 0.0\n", - "53299 5028 33 1 0.0 0.0 0.0\n", - "53300 5028 39 144 0.0 0.0 6.0\n", - "53301 5028 41 3 0.0 0.0 0.0\n", - "53302 5028 44 2 0.0 0.0 0.0\n", - "53303 5028 46 48 0.0 0.0 0.0\n", - "53304 5028 49 1 0.0 0.0 0.0\n", - "53305 5028 352 2 0.0 0.0 0.0\n", - "53306 5028 353 1 0.0 0.0 0.0\n", - "53307 5029 33 2 0.0 0.0 0.0\n", - "53308 5029 39 33 0.0 0.0 1.0\n", - "53309 5029 46 12 0.0 0.0 0.0\n", - "53310 5029 49 1 0.0 0.0 0.0\n", - "53311 5029 352 2 0.0 0.0 0.0\n", - "53312 5029 353 1 0.0 0.0 0.0\n", - "53313 10000 33 4 0.0 0.0 0.0\n", - "53314 10000 34 4 0.0 0.0 0.0\n", - "53315 10000 39 144 0.0 0.0 1.0\n", - "53316 10000 41 4 0.0 0.0 0.0\n", - "53317 10000 43 1 0.0 0.0 0.0\n", - "53318 10000 44 1 0.0 0.0 0.0\n", - "53319 10000 46 8 0.0 0.0 0.0\n", - "53320 10000 49 9 0.0 0.0 0.0\n", - "53321 10000 86 1 0.0 0.0 0.0\n", - "53322 10000 88239 2 0.0 0.0 0.0\n", - "\n", - "[53323 rows x 6 columns]" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def outliers(t, name):\n", - " def outliers_(x):\n", - " return np.sum(x >= t)\n", - " outliers_.__name__ = 'outliers%s' % name\n", - " return outliers_\n", - "\n", - "t1 = data[metric].quantile(0.99)\n", - "t5 = data[metric].quantile(0.95)\n", - "t10 = data[metric].quantile(0.90)\n", - "oracle = data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", - " 'count',\n", - " outliers(t1, \"1\"),\n", - " outliers(t5, \"5\"),\n", - " outliers(t10, \"10\")\n", - "]}).reset_index(col_level=1)\n", - "oracle.columns = oracle.columns.get_level_values(1)\n", - "oracle" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "oracle.to_csv('lib/src/test/resources/milan_oracle_cubed.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2.33149769953e-06\n", - "3.57891732062e-06\n" - ] - } - ], - "source": [ - "print(cube[\"min\"].min())\n", - "print(data[metric].min())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Moments Cube Creation.ipynb b/Moments Cube Creation.ipynb new file mode 100644 index 000000000..f48cfc2ec --- /dev/null +++ b/Moments Cube Creation.ipynb @@ -0,0 +1,4234 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "milan_data = pd.read_csv('~/Downloads/sms-call-internet-mi-2013-11-01 2.txt', sep='\\t',\n", + " header=None, names = [\"Grid\", \"Time\", \"Country\", \"SMSin\", \"SMSout\",\n", + " \"Callin\", \"Callout\", \"Internet\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD9CAYAAABA8iukAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEyFJREFUeJzt3WGMpdV93/Hvz1AcksYGEndKd1FByjYV9qqtPQIqV9U0\npLBA1CWVbWGhsLjUW8ngptVKzbp9QWViiVRJqakc1I3ZGqI0hNJErMI62w32KOoLMDi2vF6IzQiv\nw46wSbwYd2vF1pp/X9yz68t6Zufs3Jm9d2a+H+lqn+c857n33KO785tznnOfSVUhSVKPN427AZKk\ntcPQkCR1MzQkSd0MDUlSN0NDktTN0JAkdVsyNJLsTfJKki8PlV2S5GCSF9q/F7fyJLk/yVySLyV5\n59A5O1r9F5LsGCp/V5JD7Zz7k+RMryFJGp+ekcangG2nle0GnqyqLcCTbR/gBmBLe+wEHoBBAAB3\nA1cDVwF3D4XAA8AHh87btsRrSJLGZMnQqKo/AY6dVrwdeKhtPwTcPFT+cA08BVyU5FLgeuBgVR2r\nqleBg8C2duwtVfVUDb5l+PBpz7XQa0iSxmS51zSmqurltv0NYKptbwJeGqp3tJWdqfzoAuVneg1J\n0picP+oTVFUlWdV7kSz1Gkl2MpgO48ILL3zXZZddtprNGbvXX3+dN73JNQwLsW8WZ98szr6Br371\nq39ZVW9bqt5yQ+ObSS6tqpfbFNMrrXweGP6JvbmVzQMzp5XPtvLNC9Q/02v8iKraA+wBmJ6ermef\nfXaZb2ttmJ2dZWZmZtzNmEj2zeLsm8XZN5Dk6z31lhut+4CTK6B2AI8Pld/WVlFdA7zWppgOANcl\nubhdAL8OONCOfSfJNW3V1G2nPddCryFJGpMlRxpJfpfBKOGnkxxlsArqXuDRJHcAXwfe16rvB24E\n5oDvAh8AqKpjSe4Bnmn1PlpVJy+uf4jBCq0LgU+3B2d4DUnSmCwZGlX1/kUOXbtA3QLuXOR59gJ7\nFyh/FnjHAuXfWug1JEnjs7Gv/EiSzoqhIUnqZmhIkroZGpKkboaGJKmboSFJ6jbybUQk6Vy6fPcT\np7aP3HvTGFuyMRkakibecFBovJyekiR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEndXHIraSK5zHYy\nOdKQJHUzNCRJ3ZyekjQxznZKyluKnHuONCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN\n0JAkdTM0JEnd/Ea4pLHyxoRriyMNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTNJbeS1oXTl+76\nR5lWx0gjjST/NsnhJF9O8rtJfizJFUmeTjKX5PeSXNDqvrntz7Xjlw89z0da+VeSXD9Uvq2VzSXZ\nPUpbJUmjW3ZoJNkE/GtguqreAZwH3AL8GnBfVf0M8CpwRzvlDuDVVn5fq0eSK9t5bwe2Ab+Z5Lwk\n5wGfAG4ArgTe3+pKksZk1Gsa5wMXJjkf+HHgZeDngMfa8YeAm9v29rZPO35tkrTyR6rqe1X1NWAO\nuKo95qrqxar6PvBIqytJGpNlh0ZVzQO/Dvw5g7B4Dfg88O2qOtGqHQU2te1NwEvt3BOt/k8Nl592\nzmLlkqQxWfaF8CQXM/jN/wrg28D/ZDC9dM4l2QnsBJiammJ2dnYczThnjh8/vu7f43LZN4ub1L7Z\ntfXE0pWW4Wze66T2zSQaZfXUzwNfq6q/AEjy+8C7gYuSnN9GE5uB+VZ/HrgMONqms94KfGuo/KTh\ncxYrf4Oq2gPsAZienq6ZmZkR3tbkm52dZb2/x+WybxY3qX1z+yrdsPDIrTPddSe1bybRKNc0/hy4\nJsmPt2sT1wLPAZ8F3tPq7AAeb9v72j7t+Geqqlr5LW111RXAFuBzwDPAlrYa6wIGF8v3jdBeSdKI\nlj3SqKqnkzwG/ClwAvgCg9/2nwAeSfKrrezBdsqDwG8nmQOOMQgBqupwkkcZBM4J4M6q+gFAkruA\nAwxWZu2tqsPLba8kaXQjfbmvqu4G7j6t+EUGK59Or/tXwHsXeZ6PAR9boHw/sH+UNkqaPP4NjbXL\n24hIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRuhoYkqZuhIUnq\nZmhIkrqNdJdbSerlnW3XB0cakqRuhoYkqZuhIUnqZmhIkrp5IVzSujR84f3IvTeNsSXriyMNSVI3\nQ0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEnd/HKfpFXjnW3XH0cakqRuhoYkqZuhIUnq\nZmhIkroZGpKkboaGJKnbSKGR5KIkjyX5syTPJ/mHSS5JcjDJC+3fi1vdJLk/yVySLyV559Dz7Gj1\nX0iyY6j8XUkOtXPuT5JR2itJGs2oI42PA39UVX8X+HvA88Bu4Mmq2gI82fYBbgC2tMdO4AGAJJcA\ndwNXA1cBd58Mmlbng0PnbRuxvdLYXL77iVMPaa1admgkeSvwj4EHAarq+1X1bWA78FCr9hBwc9ve\nDjxcA08BFyW5FLgeOFhVx6rqVeAgsK0de0tVPVVVBTw89FySpDEYZaRxBfAXwH9P8oUkn0zyE8BU\nVb3c6nwDmGrbm4CXhs4/2srOVH50gXJJ0piMchuR84F3Ah+uqqeTfJwfTkUBUFWVpEZpYI8kOxlM\neTE1NcXs7Oxqv+RYHT9+fN2/x+WahL45NP/aqe2tm956anvX1hOntsfRxnH0zfB7Hqel3vckfG7W\nilFC4yhwtKqebvuPMQiNbya5tKpeblNMr7Tj88BlQ+dvbmXzwMxp5bOtfPMC9X9EVe0B9gBMT0/X\nzMzMQtXWjdnZWdb7e1yuSeib24euWRy5dWbJ8nNlHH1z+4Rcv1mqvyfhc7NWLHt6qqq+AbyU5Gdb\n0bXAc8A+4OQKqB3A4217H3BbW0V1DfBam8Y6AFyX5OJ2Afw64EA79p0k17RVU7cNPZckaQxGvcvt\nh4HfSXIB8CLwAQZB9GiSO4CvA+9rdfcDNwJzwHdbXarqWJJ7gGdavY9W1bG2/SHgU8CFwKfbQ5I0\nJiOFRlV9EZhe4NC1C9Qt4M5FnmcvsHeB8meBd4zSRknSyvEb4ZKkboaGJKmboSFJ6uafe5W0orxN\nyvpmaEha94aD7Mi9N42xJWuf01OSpG6GhiSpm9NT0gpwHl8bhSMNSVI3Q0OS1M3QkCR1MzQkSd0M\nDUlSN0NDktTN0JAkdTM0JEnd/HKftIr80p/WG0cakqRujjQkjcwR1cbhSEOS1M3QkCR1MzQkSd0M\nDUlSNy+ES8vkxV9tRIaGdBYMCm10hoakDWU4+I/ce9MYW7I2eU1DktTN0JAkdTM0JEndDA1JUjdD\nQ5LUzdVTkpbF5ccbkyMNSVI3RxrSGPhdAa1VjjQkSd1GDo0k5yX5QpI/bPtXJHk6yVyS30tyQSt/\nc9ufa8cvH3qOj7TyryS5fqh8WyubS7J71LZKkkazEiONXwaeH9r/NeC+qvoZ4FXgjlZ+B/BqK7+v\n1SPJlcAtwNuBbcBvtiA6D/gEcANwJfD+VldaVy7f/cSphzTpRgqNJJuBm4BPtv0APwc81qo8BNzc\ntre3fdrxa1v97cAjVfW9qvoaMAdc1R5zVfViVX0feKTVldYtA0STbtSRxn8B/h3wetv/KeDbVXWi\n7R8FNrXtTcBLAO34a63+qfLTzlmsXJI0JstePZXkF4BXqurzSWZWrknLastOYCfA1NQUs7Oz42zO\nqjt+/Pi6f4/Ltdp9s2vriaUrrZCVfh8r3Tfnsi9Wy8n+8P9Uv1GW3L4b+GdJbgR+DHgL8HHgoiTn\nt9HEZmC+1Z8HLgOOJjkfeCvwraHyk4bPWaz8DapqD7AHYHp6umZmZkZ4W5NvdnaW9f4el2u1++b2\nczhtdOTWmRV9vpXum3PZF6vm0P8DYNfWH/DhX5gZb1vWiGVPT1XVR6pqc1VdzuBC9meq6lbgs8B7\nWrUdwONte1/bpx3/TFVVK7+lra66AtgCfA54BtjSVmNd0F5j33LbK0ka3Wp8ue9XgEeS/CrwBeDB\nVv4g8NtJ5oBjDEKAqjqc5FHgOeAEcGdV/QAgyV3AAeA8YG9VHV6F9kqSOq1IaFTVLDDbtl9ksPLp\n9Dp/Bbx3kfM/BnxsgfL9wP6VaKMkaXR+I1yS1M3QkCR1MzQkSd0MDUlSN2+NLqmbtzeRIw1JUjdD\nQ5LUzdCQJHUzNCRJ3bwQLjX+3W5paY40JEndHGloQ3MJqXR2DA1JwunJXk5PSZK6GRqSpG5OT0k6\nI6/7aJihIU0o59g1iQwNaQH+wJYW5jUNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN1dPSdJpXD23OEca\nkqRujjSkJfiNaOmHHGlIkro50pD0IxxdaTGONCRJ3QwNSVI3Q0OS1M1rGtIa4PcGNCkcaUiSuhka\nkqRuyw6NJJcl+WyS55IcTvLLrfySJAeTvND+vbiVJ8n9SeaSfCnJO4eea0er/0KSHUPl70pyqJ1z\nf5KM8mal9eDy3U+84SGdS6Nc0zgB7KqqP03yk8DnkxwEbgeerKp7k+wGdgO/AtwAbGmPq4EHgKuT\nXALcDUwD1Z5nX1W92up8EHga2A9sAz49Qpu1Qa3nawLr+b1p8iw7NKrqZeDltv1/kzwPbAK2AzOt\n2kPALIPQ2A48XFUFPJXkoiSXtroHq+oYQAuebUlmgbdU1VOt/GHgZgwNjcjfznU2DOU3WpFrGkku\nB/4BgxHBVAsUgG8AU217E/DS0GlHW9mZyo8uUC5JGpORl9wm+evA/wL+TVV9Z/iyQ1VVkhr1NTra\nsBPYCTA1NcXs7Oxqv+RYHT9+fN2/x+VarG92bT1x7hszBmf6XCz1uTk0/9qp7V1bV7BRa8DUhX2f\nEf/fjRgaSf4ag8D4nar6/Vb8zSSXVtXLbfrplVY+D1w2dPrmVjbPD6ezTpbPtvLNC9T/EVW1B9gD\nMD09XTMzMwtVWzdmZ2dZ7+9xuRbrm9s3yJTUkVtnFj221Odmo/TRQnZtPcFvHFr6x+GZ+nejGGX1\nVIAHgeer6j8PHdoHnFwBtQN4fKj8traK6hrgtTaNdQC4LsnFbaXVdcCBduw7Sa5pr3Xb0HNJksZg\nlJHGu4FfAg4l+WIr+/fAvcCjSe4Avg68rx3bD9wIzAHfBT4AUFXHktwDPNPqffTkRXHgQ8CngAsZ\nXAD3Irh0Bl601WobZfXU/wEW+97EtQvUL+DORZ5rL7B3gfJngXcst43a2FwlJa08vxEuSepmaEiS\nuhkakqRu3hpd2sC87qOzZWhI69TpgbBr6wlu3/2Eq6o0EkNDkjq5pNnQkDYcp6Q0CkND68qh+dc2\n9O0wpNXm6ilJUjdDQ5LUzdCQJHXzmobWvOELuxvt70BI55ojDUlSN0NDktTN0JAkdTM0JEndDA1J\nUjdXT2lN8lYY0ngYGpK0DBv15oWGhtYMRxfS+BkamliGhDR5DA1NFINCmmyunpIkdTM0JEndnJ7S\nWDgNJa1NjjQkSd0MDUlSN6enJGlEG+mLfoaGzhmvY0hrn6GhVWVQSOuLoaEVZ1BI65ehoWXbSPO4\nkgYMDUlaQev9lylDQ2dlsaknp6SkjcHQ0IIMAUkLmfjQSLIN+DhwHvDJqrp3zE1atwwKaWWtx6mq\niQ6NJOcBnwD+KXAUeCbJvqp6brwtWz8MCklnY6JDA7gKmKuqFwGSPAJsBzZkaJz8Ab9r6wlmFiiX\nNLnWy6hj0kNjE/DS0P5R4OoxtWVFnekH/fAHygvP0vqzlgMkVTXuNiwqyXuAbVX1L9v+LwFXV9Vd\np9XbCexsuz8LfOWcNvTc+2ngL8fdiAll3yzOvlmcfQN/u6retlSlSR9pzAOXDe1vbmVvUFV7gD3n\nqlHjluTZqpoedzsmkX2zOPtmcfZNv0m/NfozwJYkVyS5ALgF2DfmNknShjXRI42qOpHkLuAAgyW3\ne6vq8JibJUkb1kSHBkBV7Qf2j7sdE2bDTMUtg32zOPtmcfZNp4m+EC5JmiyTfk1DkjRBDI01JMl7\nkxxO8nqS6dOOfSTJXJKvJLl+XG2cBEn+Y5L5JF9sjxvH3aZxSrKtfS7mkuwed3smSZIjSQ61z8mz\n427PWjDx1zT0Bl8G/jnw34YLk1zJYGXZ24G/Bfxxkr9TVT84902cGPdV1a+PuxHj5q14uvyTqtro\n39Ho5khjDamq56tqoS8ubgceqarvVdXXgDkGt2CRTt2Kp6q+D5y8FY+0LIbG+rDQ7VY2jaktk+Ku\nJF9KsjfJxeNuzBj52TizAv53ks+3O0toCU5PTZgkfwz8zQUO/Yeqevxct2dSnamfgAeAexj8QLgH\n+A3gX5y71mkN+UdVNZ/kbwAHk/xZVf3JuBs1yQyNCVNVP7+M07put7Ke9PZTkt8C/nCVmzPJNtxn\n42xU1Xz795Ukf8BgOs/QOAOnp9aHfcAtSd6c5ApgC/C5MbdpbJJcOrT7iwwWEGxU3opnEUl+IslP\nntwGrmNjf1a6ONJYQ5L8IvBfgbcBTyT5YlVdX1WHkzzK4O+MnADu3OArp/5Tkr/PYHrqCPCvxtuc\n8fFWPGc0BfxBEhj8LPwfVfVH423S5PMb4ZKkbk5PSZK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRu\nhoYkqZuhIUnq9v8BYuvezWR1bPsAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "np.log(milan_data[\"Internet\"]).hist(bins=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "metric = \"Internet\"\n", + "milan_data = milan_data[[\"Grid\", \"Country\", metric]]\n", + "milan_data = milan_data[np.isfinite(milan_data[metric])]\n", + "milan_data[metric] = np.log(milan_data[metric])\n", + "# data = data.head(1000000)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GridCountryminmaxm0m1m2m3m4m5m6m7m8
0132-6.327044-6.3270441.0-6.32704440.031480-253.2809151602.519371-10139.20980864151.221753-4.058876e+052.568068e+06
1133-3.644387-3.6443875.0-18.22193666.407787-242.015683881.998838-3214.34519611714.318207-4.269151e+041.555844e+05
21391.5374672.987433144.0330.013292773.5542321848.8587984492.68869211070.83949227604.8278656.952730e+041.766393e+05
3146-6.327044-3.64438714.0-61.752045292.941493-1497.1550268174.075159-46985.529626280033.523428-1.708933e+061.058344e+07
4149-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+04
5232-6.989444-6.9894441.0-6.98944448.852333-341.4506632386.550423-16680.661453116588.555547-8.148892e+055.695623e+06
6233-3.598820-3.5988205.0-17.99410064.757529-233.050692838.707500-3018.35736410862.524979-3.909227e+041.406861e+05
72391.5377712.990041144.0330.521887775.9847831857.7189214521.80606011161.60652027879.1220747.033982e+041.790136e+05
8246-6.989444-3.59882014.0-63.945978324.924388-1831.90403711223.616691-72759.360538488079.272146-3.337741e+062.306386e+07
9249-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+04
10333-3.552494-3.5524945.0-17.76247063.101069-224.166170796.348978-2829.02498210050.094333-3.570290e+041.268343e+05
113391.5380942.992809144.0331.060698778.5645771867.1411614552.83122011258.51179428172.5489377.121076e+041.815637e+05
12346-3.552494-3.55249410.0-35.524940126.202138-448.3323401592.697956-5658.04996420100.188667-7.140580e+042.536687e+05
13349-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+04
14432-5.450309-5.4503091.0-5.45030929.705873-161.906200882.438887-4809.56499426213.617470-1.428723e+057.786984e+05
15433-3.789458-3.7894585.0-18.94729171.799965-272.0829601031.046984-3907.10937714805.827395-5.610606e+042.126116e+05
164391.5365872.979840144.0328.526707766.4754811823.1491704408.51271110809.42655026817.8402826.720492e+041.698787e+05
17446-5.450309-3.78945814.0-59.695819262.423421-1191.7907185591.849518-27052.478731134466.124669-6.837014e+053.540017e+06
18449-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+04
19532-5.314323-5.3143231.0-5.31432328.242029-150.087267797.612217-4238.76897422526.187576-1.197114e+056.361852e+05
20533-3.829188-3.8291885.0-19.14593873.313386-280.7307031074.970507-4116.26366115761.945491-6.035545e+042.311123e+05
215391.4468712.867586144.0315.103022706.3175271616.2021733762.9464958889.25808421256.2531215.135412e+041.251688e+05
22546-5.314323-3.82918814.0-59.549167259.594889-1161.8104725340.389882-25187.603218121628.641284-5.995566e+053.006966e+06
23549-3.744196-3.7441963.0-11.23258942.057016-157.469718589.597519-2207.5687928265.570682-3.094792e+041.158751e+05
24633-3.552494-3.5524945.0-17.76247063.101069-224.166170796.348978-2829.02498210050.094333-3.570290e+041.268343e+05
256391.5380942.992809144.0331.060698778.5645771867.1411614552.83122011258.51179428172.5489377.121076e+041.815637e+05
26646-3.552494-3.55249410.0-35.524940126.202138-448.3323401592.697956-5658.04996420100.188667-7.140580e+042.536687e+05
27649-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+04
28733-3.552494-3.5524945.0-17.76247063.101069-224.166170796.348978-2829.02498210050.094333-3.570290e+041.268343e+05
297391.5380942.992809144.0331.060698778.5645771867.1411614552.83122011258.51179428172.5489377.121076e+041.815637e+05
..........................................
132533999786-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132534999788239-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
132535999833-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132536999834-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
1325379998392.9455794.403509144.0524.7800661929.3742097154.11544926746.073877100782.539685382634.3992171.463212e+065.633865e+06
132538999841-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132539999846-2.453469-2.4534694.0-9.81387524.078036-59.074709144.937953-355.600742872.455313-2.140542e+035.251753e+03
132540999849-2.453469-2.4534696.0-14.72081336.117054-88.612064217.406930-533.4011131308.682969-3.210813e+037.877629e+03
132541999886-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132542999888239-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
132543999933-2.453469-1.3231284.0-8.62007619.499824-45.491191108.091651-259.552480626.934249-1.519298e+033.688524e+03
132544999934-4.219398-1.7443314.0-10.83893832.730342-109.398491396.846705-1525.7198956090.915444-2.488246e+041.030442e+05
1325459999392.3365244.146807144.0461.0384861501.3484304968.00755816687.96075256846.694098196182.2147336.852638e+052.420556e+06
132546999941-2.453469-2.4217404.0-9.71868923.613982-57.377915139.422981-338.795878823.296205-2.000730e+034.862226e+03
132547999943-2.421740-2.4217401.0-2.4217405.864824-14.20307934.396164-83.298564201.727459-4.885314e+021.183096e+03
132548999944-2.421740-2.4217401.0-2.4217405.864824-14.20307934.396164-83.298564201.727459-4.885314e+021.183096e+03
132549999946-4.219398-2.4534698.0-26.69146695.291302-359.5517961412.770269-5705.08943023444.075302-9.737918e+044.071014e+05
132550999949-4.219398-2.4534699.0-27.37900689.527003-313.9698791168.281167-4545.51762918237.397961-7.463979e+043.092649e+05
132551999986-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132552999988239-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
1325531000033-2.453469-0.4704264.0-6.06197211.164573-22.59835848.405187-107.942676247.966919-5.819642e+021.386409e+03
1325541000034-2.914497-1.2233594.0-8.16036318.472290-45.218936116.688164-311.439652849.275503-2.348909e+036.560741e+03
13255510000392.1005004.289594144.0425.4271731283.2363483949.88945612400.18335939683.914847129402.7936314.297711e+051.453222e+06
1325561000041-2.453469-1.5690394.0-7.16058413.405154-26.35703954.417075-117.429363262.877207-6.053709e+021.423140e+03
1325571000043-1.569039-1.5690391.0-1.5690392.461882-3.8627876.060862-9.50972614.921126-2.341182e+013.673405e+01
1325581000044-1.569039-1.5690391.0-1.5690392.461882-3.8627876.060862-9.50972614.921126-2.341182e+013.673405e+01
1325591000046-2.914497-2.4534698.0-21.47186258.055200-158.101040433.549868-1196.7592123324.008890-9.285587e+032.607596e+04
1325601000049-2.914497-2.4534699.0-23.46430361.599927-162.881812433.865866-1164.2699663147.348152-8.569596e+032.349579e+04
1325611000086-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
1325621000088239-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
\n", + "

132563 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Grid Country min max m0 m1 m2 \\\n", + "0 1 32 -6.327044 -6.327044 1.0 -6.327044 40.031480 \n", + "1 1 33 -3.644387 -3.644387 5.0 -18.221936 66.407787 \n", + "2 1 39 1.537467 2.987433 144.0 330.013292 773.554232 \n", + "3 1 46 -6.327044 -3.644387 14.0 -61.752045 292.941493 \n", + "4 1 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", + "5 2 32 -6.989444 -6.989444 1.0 -6.989444 48.852333 \n", + "6 2 33 -3.598820 -3.598820 5.0 -17.994100 64.757529 \n", + "7 2 39 1.537771 2.990041 144.0 330.521887 775.984783 \n", + "8 2 46 -6.989444 -3.598820 14.0 -63.945978 324.924388 \n", + "9 2 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", + "10 3 33 -3.552494 -3.552494 5.0 -17.762470 63.101069 \n", + "11 3 39 1.538094 2.992809 144.0 331.060698 778.564577 \n", + "12 3 46 -3.552494 -3.552494 10.0 -35.524940 126.202138 \n", + "13 3 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", + "14 4 32 -5.450309 -5.450309 1.0 -5.450309 29.705873 \n", + "15 4 33 -3.789458 -3.789458 5.0 -18.947291 71.799965 \n", + "16 4 39 1.536587 2.979840 144.0 328.526707 766.475481 \n", + "17 4 46 -5.450309 -3.789458 14.0 -59.695819 262.423421 \n", + "18 4 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", + "19 5 32 -5.314323 -5.314323 1.0 -5.314323 28.242029 \n", + "20 5 33 -3.829188 -3.829188 5.0 -19.145938 73.313386 \n", + "21 5 39 1.446871 2.867586 144.0 315.103022 706.317527 \n", + "22 5 46 -5.314323 -3.829188 14.0 -59.549167 259.594889 \n", + "23 5 49 -3.744196 -3.744196 3.0 -11.232589 42.057016 \n", + "24 6 33 -3.552494 -3.552494 5.0 -17.762470 63.101069 \n", + "25 6 39 1.538094 2.992809 144.0 331.060698 778.564577 \n", + "26 6 46 -3.552494 -3.552494 10.0 -35.524940 126.202138 \n", + "27 6 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", + "28 7 33 -3.552494 -3.552494 5.0 -17.762470 63.101069 \n", + "29 7 39 1.538094 2.992809 144.0 331.060698 778.564577 \n", + "... ... ... ... ... ... ... ... \n", + "132533 9997 86 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", + "132534 9997 88239 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", + "132535 9998 33 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", + "132536 9998 34 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", + "132537 9998 39 2.945579 4.403509 144.0 524.780066 1929.374209 \n", + "132538 9998 41 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", + "132539 9998 46 -2.453469 -2.453469 4.0 -9.813875 24.078036 \n", + "132540 9998 49 -2.453469 -2.453469 6.0 -14.720813 36.117054 \n", + "132541 9998 86 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", + "132542 9998 88239 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", + "132543 9999 33 -2.453469 -1.323128 4.0 -8.620076 19.499824 \n", + "132544 9999 34 -4.219398 -1.744331 4.0 -10.838938 32.730342 \n", + "132545 9999 39 2.336524 4.146807 144.0 461.038486 1501.348430 \n", + "132546 9999 41 -2.453469 -2.421740 4.0 -9.718689 23.613982 \n", + "132547 9999 43 -2.421740 -2.421740 1.0 -2.421740 5.864824 \n", + "132548 9999 44 -2.421740 -2.421740 1.0 -2.421740 5.864824 \n", + "132549 9999 46 -4.219398 -2.453469 8.0 -26.691466 95.291302 \n", + "132550 9999 49 -4.219398 -2.453469 9.0 -27.379006 89.527003 \n", + "132551 9999 86 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", + "132552 9999 88239 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", + "132553 10000 33 -2.453469 -0.470426 4.0 -6.061972 11.164573 \n", + "132554 10000 34 -2.914497 -1.223359 4.0 -8.160363 18.472290 \n", + "132555 10000 39 2.100500 4.289594 144.0 425.427173 1283.236348 \n", + "132556 10000 41 -2.453469 -1.569039 4.0 -7.160584 13.405154 \n", + "132557 10000 43 -1.569039 -1.569039 1.0 -1.569039 2.461882 \n", + "132558 10000 44 -1.569039 -1.569039 1.0 -1.569039 2.461882 \n", + "132559 10000 46 -2.914497 -2.453469 8.0 -21.471862 58.055200 \n", + "132560 10000 49 -2.914497 -2.453469 9.0 -23.464303 61.599927 \n", + "132561 10000 86 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", + "132562 10000 88239 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", + "\n", + " m3 m4 m5 m6 m7 \\\n", + "0 -253.280915 1602.519371 -10139.209808 64151.221753 -4.058876e+05 \n", + "1 -242.015683 881.998838 -3214.345196 11714.318207 -4.269151e+04 \n", + "2 1848.858798 4492.688692 11070.839492 27604.827865 6.952730e+04 \n", + "3 -1497.155026 8174.075159 -46985.529626 280033.523428 -1.708933e+06 \n", + "4 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", + "5 -341.450663 2386.550423 -16680.661453 116588.555547 -8.148892e+05 \n", + "6 -233.050692 838.707500 -3018.357364 10862.524979 -3.909227e+04 \n", + "7 1857.718921 4521.806060 11161.606520 27879.122074 7.033982e+04 \n", + "8 -1831.904037 11223.616691 -72759.360538 488079.272146 -3.337741e+06 \n", + "9 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", + "10 -224.166170 796.348978 -2829.024982 10050.094333 -3.570290e+04 \n", + "11 1867.141161 4552.831220 11258.511794 28172.548937 7.121076e+04 \n", + "12 -448.332340 1592.697956 -5658.049964 20100.188667 -7.140580e+04 \n", + "13 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", + "14 -161.906200 882.438887 -4809.564994 26213.617470 -1.428723e+05 \n", + "15 -272.082960 1031.046984 -3907.109377 14805.827395 -5.610606e+04 \n", + "16 1823.149170 4408.512711 10809.426550 26817.840282 6.720492e+04 \n", + "17 -1191.790718 5591.849518 -27052.478731 134466.124669 -6.837014e+05 \n", + "18 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", + "19 -150.087267 797.612217 -4238.768974 22526.187576 -1.197114e+05 \n", + "20 -280.730703 1074.970507 -4116.263661 15761.945491 -6.035545e+04 \n", + "21 1616.202173 3762.946495 8889.258084 21256.253121 5.135412e+04 \n", + "22 -1161.810472 5340.389882 -25187.603218 121628.641284 -5.995566e+05 \n", + "23 -157.469718 589.597519 -2207.568792 8265.570682 -3.094792e+04 \n", + "24 -224.166170 796.348978 -2829.024982 10050.094333 -3.570290e+04 \n", + "25 1867.141161 4552.831220 11258.511794 28172.548937 7.121076e+04 \n", + "26 -448.332340 1592.697956 -5658.049964 20100.188667 -7.140580e+04 \n", + "27 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", + "28 -224.166170 796.348978 -2829.024982 10050.094333 -3.570290e+04 \n", + "29 1867.141161 4552.831220 11258.511794 28172.548937 7.121076e+04 \n", + "... ... ... ... ... ... \n", + "132533 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", + "132534 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", + "132535 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", + "132536 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", + "132537 7154.115449 26746.073877 100782.539685 382634.399217 1.463212e+06 \n", + "132538 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", + "132539 -59.074709 144.937953 -355.600742 872.455313 -2.140542e+03 \n", + "132540 -88.612064 217.406930 -533.401113 1308.682969 -3.210813e+03 \n", + "132541 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", + "132542 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", + "132543 -45.491191 108.091651 -259.552480 626.934249 -1.519298e+03 \n", + "132544 -109.398491 396.846705 -1525.719895 6090.915444 -2.488246e+04 \n", + "132545 4968.007558 16687.960752 56846.694098 196182.214733 6.852638e+05 \n", + "132546 -57.377915 139.422981 -338.795878 823.296205 -2.000730e+03 \n", + "132547 -14.203079 34.396164 -83.298564 201.727459 -4.885314e+02 \n", + "132548 -14.203079 34.396164 -83.298564 201.727459 -4.885314e+02 \n", + "132549 -359.551796 1412.770269 -5705.089430 23444.075302 -9.737918e+04 \n", + "132550 -313.969879 1168.281167 -4545.517629 18237.397961 -7.463979e+04 \n", + "132551 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", + "132552 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", + "132553 -22.598358 48.405187 -107.942676 247.966919 -5.819642e+02 \n", + "132554 -45.218936 116.688164 -311.439652 849.275503 -2.348909e+03 \n", + "132555 3949.889456 12400.183359 39683.914847 129402.793631 4.297711e+05 \n", + "132556 -26.357039 54.417075 -117.429363 262.877207 -6.053709e+02 \n", + "132557 -3.862787 6.060862 -9.509726 14.921126 -2.341182e+01 \n", + "132558 -3.862787 6.060862 -9.509726 14.921126 -2.341182e+01 \n", + "132559 -158.101040 433.549868 -1196.759212 3324.008890 -9.285587e+03 \n", + "132560 -162.881812 433.865866 -1164.269966 3147.348152 -8.569596e+03 \n", + "132561 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", + "132562 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", + "\n", + " m8 \n", + "0 2.568068e+06 \n", + "1 1.555844e+05 \n", + "2 1.766393e+05 \n", + "3 1.058344e+07 \n", + "4 8.479358e+04 \n", + "5 5.695623e+06 \n", + "6 1.406861e+05 \n", + "7 1.790136e+05 \n", + "8 2.306386e+07 \n", + "9 8.479358e+04 \n", + "10 1.268343e+05 \n", + "11 1.815637e+05 \n", + "12 2.536687e+05 \n", + "13 8.479358e+04 \n", + "14 7.786984e+05 \n", + "15 2.126116e+05 \n", + "16 1.698787e+05 \n", + "17 3.540017e+06 \n", + "18 8.479358e+04 \n", + "19 6.361852e+05 \n", + "20 2.311123e+05 \n", + "21 1.251688e+05 \n", + "22 3.006966e+06 \n", + "23 1.158751e+05 \n", + "24 1.268343e+05 \n", + "25 1.815637e+05 \n", + "26 2.536687e+05 \n", + "27 8.479358e+04 \n", + "28 1.268343e+05 \n", + "29 1.815637e+05 \n", + "... ... \n", + "132533 1.312938e+03 \n", + "132534 2.625876e+03 \n", + "132535 1.312938e+03 \n", + "132536 2.625876e+03 \n", + "132537 5.633865e+06 \n", + "132538 1.312938e+03 \n", + "132539 5.251753e+03 \n", + "132540 7.877629e+03 \n", + "132541 1.312938e+03 \n", + "132542 2.625876e+03 \n", + "132543 3.688524e+03 \n", + "132544 1.030442e+05 \n", + "132545 2.420556e+06 \n", + "132546 4.862226e+03 \n", + "132547 1.183096e+03 \n", + "132548 1.183096e+03 \n", + "132549 4.071014e+05 \n", + "132550 3.092649e+05 \n", + "132551 1.312938e+03 \n", + "132552 2.625876e+03 \n", + "132553 1.386409e+03 \n", + "132554 6.560741e+03 \n", + "132555 1.453222e+06 \n", + "132556 1.423140e+03 \n", + "132557 3.673405e+01 \n", + "132558 3.673405e+01 \n", + "132559 2.607596e+04 \n", + "132560 2.349579e+04 \n", + "132561 1.312938e+03 \n", + "132562 2.625876e+03 \n", + "\n", + "[132563 rows x 13 columns]" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def moment(n):\n", + " def moment_(x):\n", + " return np.sum(np.power(x, n))\n", + " moment_.__name__ = 'm%s' % n\n", + " return moment_\n", + "\n", + "milan_cube = milan_data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", + " 'min',\n", + " 'max',\n", + " moment(0),\n", + " moment(1),\n", + " moment(2),\n", + " moment(3),\n", + " moment(4),\n", + " moment(5),\n", + " moment(6),\n", + " moment(7),\n", + " moment(8)\n", + "]}).reset_index(col_level=1)\n", + "milan_cube.columns = milan_cube.columns.get_level_values(1)\n", + "milan_cube" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "milan_cube.to_csv('lib/src/test/resources/milan_moments_cubed.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GridCountrycountoutliers1outliers5outliers10
013210.00.00.0
113350.00.00.0
21391440.00.00.0
3146140.00.00.0
414930.00.00.0
523210.00.00.0
623350.00.00.0
72391440.00.00.0
8246140.00.00.0
924930.00.00.0
1033350.00.00.0
113391440.00.00.0
12346100.00.00.0
1334930.00.00.0
1443210.00.00.0
1543350.00.00.0
164391440.00.00.0
17446140.00.00.0
1844930.00.00.0
1953210.00.00.0
2053350.00.00.0
215391440.00.00.0
22546140.00.00.0
2354930.00.00.0
2463350.00.00.0
256391440.00.00.0
26646100.00.00.0
2764930.00.00.0
2873350.00.00.0
297391440.00.00.0
.....................
13253399978610.00.00.0
13253499978823920.00.00.0
13253599983310.00.00.0
13253699983420.00.00.0
1325379998391440.00.00.0
13253899984110.00.00.0
13253999984640.00.00.0
13254099984960.00.00.0
13254199988610.00.00.0
13254299988823920.00.00.0
13254399993340.00.00.0
13254499993440.00.00.0
1325459999391440.00.00.0
13254699994140.00.00.0
13254799994310.00.00.0
13254899994410.00.00.0
13254999994680.00.00.0
13255099994990.00.00.0
13255199998610.00.00.0
13255299998823920.00.00.0
132553100003340.00.00.0
132554100003440.00.00.0
13255510000391440.00.00.0
132556100004140.00.00.0
132557100004310.00.00.0
132558100004410.00.00.0
132559100004680.00.00.0
132560100004990.00.00.0
132561100008610.00.00.0
132562100008823920.00.00.0
\n", + "

132563 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Grid Country count outliers1 outliers5 outliers10\n", + "0 1 32 1 0.0 0.0 0.0\n", + "1 1 33 5 0.0 0.0 0.0\n", + "2 1 39 144 0.0 0.0 0.0\n", + "3 1 46 14 0.0 0.0 0.0\n", + "4 1 49 3 0.0 0.0 0.0\n", + "5 2 32 1 0.0 0.0 0.0\n", + "6 2 33 5 0.0 0.0 0.0\n", + "7 2 39 144 0.0 0.0 0.0\n", + "8 2 46 14 0.0 0.0 0.0\n", + "9 2 49 3 0.0 0.0 0.0\n", + "10 3 33 5 0.0 0.0 0.0\n", + "11 3 39 144 0.0 0.0 0.0\n", + "12 3 46 10 0.0 0.0 0.0\n", + "13 3 49 3 0.0 0.0 0.0\n", + "14 4 32 1 0.0 0.0 0.0\n", + "15 4 33 5 0.0 0.0 0.0\n", + "16 4 39 144 0.0 0.0 0.0\n", + "17 4 46 14 0.0 0.0 0.0\n", + "18 4 49 3 0.0 0.0 0.0\n", + "19 5 32 1 0.0 0.0 0.0\n", + "20 5 33 5 0.0 0.0 0.0\n", + "21 5 39 144 0.0 0.0 0.0\n", + "22 5 46 14 0.0 0.0 0.0\n", + "23 5 49 3 0.0 0.0 0.0\n", + "24 6 33 5 0.0 0.0 0.0\n", + "25 6 39 144 0.0 0.0 0.0\n", + "26 6 46 10 0.0 0.0 0.0\n", + "27 6 49 3 0.0 0.0 0.0\n", + "28 7 33 5 0.0 0.0 0.0\n", + "29 7 39 144 0.0 0.0 0.0\n", + "... ... ... ... ... ... ...\n", + "132533 9997 86 1 0.0 0.0 0.0\n", + "132534 9997 88239 2 0.0 0.0 0.0\n", + "132535 9998 33 1 0.0 0.0 0.0\n", + "132536 9998 34 2 0.0 0.0 0.0\n", + "132537 9998 39 144 0.0 0.0 0.0\n", + "132538 9998 41 1 0.0 0.0 0.0\n", + "132539 9998 46 4 0.0 0.0 0.0\n", + "132540 9998 49 6 0.0 0.0 0.0\n", + "132541 9998 86 1 0.0 0.0 0.0\n", + "132542 9998 88239 2 0.0 0.0 0.0\n", + "132543 9999 33 4 0.0 0.0 0.0\n", + "132544 9999 34 4 0.0 0.0 0.0\n", + "132545 9999 39 144 0.0 0.0 0.0\n", + "132546 9999 41 4 0.0 0.0 0.0\n", + "132547 9999 43 1 0.0 0.0 0.0\n", + "132548 9999 44 1 0.0 0.0 0.0\n", + "132549 9999 46 8 0.0 0.0 0.0\n", + "132550 9999 49 9 0.0 0.0 0.0\n", + "132551 9999 86 1 0.0 0.0 0.0\n", + "132552 9999 88239 2 0.0 0.0 0.0\n", + "132553 10000 33 4 0.0 0.0 0.0\n", + "132554 10000 34 4 0.0 0.0 0.0\n", + "132555 10000 39 144 0.0 0.0 0.0\n", + "132556 10000 41 4 0.0 0.0 0.0\n", + "132557 10000 43 1 0.0 0.0 0.0\n", + "132558 10000 44 1 0.0 0.0 0.0\n", + "132559 10000 46 8 0.0 0.0 0.0\n", + "132560 10000 49 9 0.0 0.0 0.0\n", + "132561 10000 86 1 0.0 0.0 0.0\n", + "132562 10000 88239 2 0.0 0.0 0.0\n", + "\n", + "[132563 rows x 6 columns]" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def outliers(t, name):\n", + " def outliers_(x):\n", + " return np.sum(x >= t)\n", + " outliers_.__name__ = 'outliers%s' % name\n", + " return outliers_\n", + "\n", + "t1 = milan_data[metric].quantile(0.99)\n", + "t5 = milan_data[metric].quantile(0.95)\n", + "t10 = milan_data[metric].quantile(0.90)\n", + "milan_oracle = milan_data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", + " 'count',\n", + " outliers(t1, \"1\"),\n", + " outliers(t5, \"5\"),\n", + " outliers(t10, \"10\")\n", + "]}).reset_index(col_level=1)\n", + "milan_oracle.columns = milan_oracle.columns.get_level_values(1)\n", + "milan_oracle" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "milan_oracle.to_csv('lib/src/test/resources/milan_oracle_cubed.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wikipedia" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [], + "source": [ + "wiki_data = pd.read_json('~/Downloads/wikipedia-2015-09-12', lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [], + "source": [ + "metric = \"delta\"\n", + "attributes = [\"channel\", \"countryName\", \"namespace\"]\n", + "wiki_data = wiki_data[attributes + [metric]]\n", + "wiki_data = wiki_data[(np.isfinite(wiki_data[metric])) & (wiki_data[metric] != 0)]\n", + "wiki_data[metric] = np.log(np.absolute(wiki_data[metric]))" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFf1JREFUeJzt3X+w3XWd3/Hna4msLK4GVnuHJkzDjBkdFqriHYi13bnK\nigEdwx+ug0MlWGr+EK3bYUZDOx2m/uiw03ZdaV07GckStlSWYXXICBoz0TNOZ4oCakFAyy2iJA3g\nGn5spKvN7rt/nE/wmO+9ueee3JtzLnk+Zs7c7/f9/Xy+9/PhhPO63x/nnFQVkiQN+o1xD0CSNHkM\nB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6Vo17AKN65StfWevWrRup789//nNO\nPfXUpR3QceYcJoNzmAzOYTj33XffX1XVq4Zpu2LDYd26ddx7770j9e31eszMzCztgI4z5zAZnMNk\ncA7DSfLjYdt6WkmS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktSxYt8hraW3\nbuudLyw/dv07xjgSSePmkYMkqcNwkCR1GA6SpI6hwiHJ6iS3J/lBkoeTvCnJ6Ul2J3mk/TyttU2S\nG5LMJrk/yXkD+9nc2j+SZPNA/Y1JHmh9bkiSpZ+qJGlYwx45fAb4alW9Fngd8DCwFdhTVeuBPW0d\n4GJgfXtsAT4HkOR04DrgAuB84LrDgdLafGCg38Zjm5Yk6VgsGA5JXgH8HnAjQFX9sqqeATYBO1qz\nHcClbXkTcHP13Q2sTnIG8HZgd1UdqKqngd3Axrbt5VV1d1UVcPPAviRJYzDMkcNZwE+BP0vy3SSf\nT3IqMFVV+1ubJ4CptrwGeHyg/95WO1p97xx1SdKYDPM+h1XAecCHq+pbST7Dr04hAVBVlaSWY4CD\nkmyhf6qKqakper3eSPs5ePDgyH0nxXLM4ZpzD72wfDz++/g8TAbnMBkmbQ7DhMNeYG9Vfaut304/\nHJ5MckZV7W+nhp5q2/cBZw70X9tq+4CZI+q9Vl87R/uOqtoGbAOYnp6uUb9Sz68UnNuVg2+Cu3xp\n9z0Xn4fJ4Bwmw6TNYcHTSlX1BPB4kte00oXAQ8BO4PAdR5uBO9ryTuCKdtfSBuDZdvppF3BRktPa\nheiLgF1t23NJNrS7lK4Y2JckaQyG/fiMDwO3JDkZeBR4P/1guS3JVcCPgfe0tncBlwCzwPOtLVV1\nIMkngHtau49X1YG2/EHgJuAU4CvtIUkak6HCoaq+B0zPsenCOdoWcPU8+9kObJ+jfi9wzjBjkSQt\nP98hLUnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1\nGA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUsdQ\n4ZDksSQPJPlekntb7fQku5M80n6e1upJckOS2ST3JzlvYD+bW/tHkmweqL+x7X+29c1ST1SSNLzF\nHDm8papeX1XTbX0rsKeq1gN72jrAxcD69tgCfA76YQJcB1wAnA9cdzhQWpsPDPTbOPKMJEnH7FhO\nK20CdrTlHcClA/Wbq+9uYHWSM4C3A7ur6kBVPQ3sBja2bS+vqrurqoCbB/YlSRqDYcOhgK8luS/J\nllabqqr9bfkJYKotrwEeH+i7t9WOVt87R12SNCarhmz3j6tqX5K/B+xO8oPBjVVVSWrph/frWjBt\nAZiamqLX6420n4MHD47cd1IsxxyuOffQC8vH47+Pz8NkcA6TYdLmMFQ4VNW+9vOpJF+if83gySRn\nVNX+dmroqdZ8H3DmQPe1rbYPmDmi3mv1tXO0n2sc24BtANPT0zUzMzNXswX1ej1G7TsplmMOV269\n84Xlxy5f2n3PxedhMjiHyTBpc1jwtFKSU5P89uFl4CLg+8BO4PAdR5uBO9ryTuCKdtfSBuDZdvpp\nF3BRktPaheiLgF1t23NJNrS7lK4Y2JckaQyGOXKYAr7U7i5dBfy3qvpqknuA25JcBfwYeE9rfxdw\nCTALPA+8H6CqDiT5BHBPa/fxqjrQlj8I3AScAnylPSRJY7JgOFTVo8Dr5qj/DLhwjnoBV8+zr+3A\n9jnq9wLnDDFeSdJx4DukJUkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaD\nJKnDcJAkdQz7fQ7SUa0b/Ljv698xxpFIWgoeOUiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6S\npA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHUOHQ5KTknw3yZfb+llJvpVkNslfJDm51X+zrc+27esG\n9nFtq/8wydsH6htbbTbJ1qWbniRpFIs5cvgI8PDA+h8Bn66qVwNPA1e1+lXA063+6daOJGcDlwG/\nC2wE/rQFzknAZ4GLgbOB97a2kqQxGSockqwF3gF8vq0HeCtwe2uyA7i0LW9q67TtF7b2m4Bbq+oX\nVfUjYBY4vz1mq+rRqvolcGtrK0kak2GPHP4E+Cjwd239d4BnqupQW98LrGnLa4DHAdr2Z1v7F+pH\n9JmvLkkakwW/7CfJO4Gnquq+JDPLP6SjjmULsAVgamqKXq830n4OHjw4ct9JsRxzuObcQy8sL3bf\no/T1eZgMzmEyTNochvkmuDcD70pyCfBS4OXAZ4DVSVa1o4O1wL7Wfh9wJrA3ySrgFcDPBuqHDfaZ\nr/5rqmobsA1genq6ZmZmhhh+V6/XY9S+k2I55nDl4Le5Xb64fY/S1+dhMjiHyTBpc1jwtFJVXVtV\na6tqHf0Lyl+vqsuBbwDvbs02A3e05Z1tnbb961VVrX5Zu5vpLGA98G3gHmB9u/vp5PY7di7J7CRJ\nIzmW75D+GHBrkk8C3wVubPUbgT9PMgscoP9iT1U9mOQ24CHgEHB1Vf0tQJIPAbuAk4DtVfXgMYxL\nknSMFhUOVdUDem35Ufp3Gh3Z5m+AP5in/6eAT81Rvwu4azFjkSQtH98hLUnqMBwkSR2GgySpw3CQ\nJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lS\nh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqWDAckrw0ybeT/M8kDyb5\nt61+VpJvJZlN8hdJTm7132zrs237uoF9XdvqP0zy9oH6xlabTbJ16acpSVqMYY4cfgG8tapeB7we\n2JhkA/BHwKer6tXA08BVrf1VwNOt/unWjiRnA5cBvwtsBP40yUlJTgI+C1wMnA28t7WVJI3JguFQ\nfQfb6kvao4C3Are3+g7g0ra8qa3Ttl+YJK1+a1X9oqp+BMwC57fHbFU9WlW/BG5tbSVJY7JqmEbt\nr/v7gFfT/yv/fwPPVNWh1mQvsKYtrwEeB6iqQ0meBX6n1e8e2O1gn8ePqF8wzzi2AFsApqam6PV6\nwwy/4+DBgyP3nRTLMYdrzj30wvJi9z1KX5+HyeAcJsOkzWGocKiqvwVen2Q18CXgtcs6qvnHsQ3Y\nBjA9PV0zMzMj7afX6zFq30mxHHO4cuudLyw/dvni9j1KX5+HyeAcJsOkzWFRdytV1TPAN4A3AauT\nHA6XtcC+trwPOBOgbX8F8LPB+hF95qtLksZkmLuVXtWOGEhyCvA24GH6IfHu1mwzcEdb3tnWadu/\nXlXV6pe1u5nOAtYD3wbuAda3u59Opn/ReudSTE6SNJphTiudAexo1x1+A7itqr6c5CHg1iSfBL4L\n3Nja3wj8eZJZ4AD9F3uq6sEktwEPAYeAq9vpKpJ8CNgFnARsr6oHl2yGkqRFWzAcqup+4A1z1B+l\nf6fRkfW/Af5gnn19CvjUHPW7gLuGGK8k6TjwHdKSpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaD\nJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiS\nOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1LBgOSc5M8o0kDyV5MMlHWv30JLuTPNJ+ntbqSXJDktkk\n9yc5b2Bfm1v7R5JsHqi/MckDrc8NSbIck5UkDWeYI4dDwDVVdTawAbg6ydnAVmBPVa0H9rR1gIuB\n9e2xBfgc9MMEuA64ADgfuO5woLQ2Hxjot/HYpyZJGtWC4VBV+6vqO235r4GHgTXAJmBHa7YDuLQt\nbwJurr67gdVJzgDeDuyuqgNV9TSwG9jYtr28qu6uqgJuHtiXJGkM0n89HrJxsg74JnAO8JOqWt3q\nAZ6uqtVJvgxcX1X/vW3bA3wMmAFeWlWfbPV/A/xfoNfa/36r/xPgY1X1zjl+/xb6RyNMTU298dZb\nb138jIGDBw/yspe9bKS+k2I55vDAvmdfWD53zSuWva/Pw2RwDpPheMzhLW95y31VNT1M21XD7jTJ\ny4C/BP6wqp4bvCxQVZVk+JQZUVVtA7YBTE9P18zMzEj76fV6jNp3UizHHK7ceucLy49dvrh9j9LX\n52EyOIfJMGlzGOpupSQvoR8Mt1TVF1v5yXZKiPbzqVbfB5w50H1tqx2tvnaOuiRpTIa5WynAjcDD\nVfXHA5t2AofvONoM3DFQv6LdtbQBeLaq9gO7gIuSnNYuRF8E7Grbnkuyof2uKwb2JUkag2FOK70Z\neB/wQJLvtdq/Aq4HbktyFfBj4D1t213AJcAs8DzwfoCqOpDkE8A9rd3Hq+pAW/4gcBNwCvCV9pAk\njcmC4dAuLM/3voML52hfwNXz7Gs7sH2O+r30L3JLkiaA75CWJHUYDpKkDsNBktRhOEiSOgwHSVKH\n4SBJ6jAcJEkdJ2Q4PLDvWdZtvZN1A58HJEn6lRMyHCRJR2c4SJI6DAdJUofhIEnqMBwkSR2GgySp\nw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpI4FwyHJ9iRPJfn+\nQO30JLuTPNJ+ntbqSXJDktkk9yc5b6DP5tb+kSSbB+pvTPJA63NDkiz1JHV8Hf4iJb9MSVq5hjly\nuAnYeERtK7CnqtYDe9o6wMXA+vbYAnwO+mECXAdcAJwPXHc4UFqbDwz0O/J3SZKOs1ULNaiqbyZZ\nd0R5EzDTlncAPeBjrX5zVRVwd5LVSc5obXdX1QGAJLuBjUl6wMur6u5Wvxm4FPjKsUxqOQ3+NfzY\n9e8Y40gkafmMes1hqqr2t+UngKm2vAZ4fKDd3lY7Wn3vHHVJ0hgteOSwkKqqJLUUg1lIki30T1cx\nNTVFr9cbaT9Tp8A15x4CWPQ+Dvcbpe9SOnjw4JL//mOZ22DfQUfbz3LM4XhzDpPBOSy9UcPhySRn\nVNX+dtroqVbfB5w50G5tq+3jV6ehDtd7rb52jvZzqqptwDaA6enpmpmZma/pUf2nW+7gPz7Qn/pj\nly9uH1cOnlZaZN+l1Ov1GHX+8zmWuV05z8Xno+1nOeZwvDmHyeAclt6op5V2AofvONoM3DFQv6Ld\ntbQBeLadftoFXJTktHYh+iJgV9v2XJIN7S6lKwb2JUkakwWPHJJ8gf5f/a9Mspf+XUfXA7cluQr4\nMfCe1vwu4BJgFngeeD9AVR1I8gngntbu44cvTgMfpH9H1Cn0L0RP7MXolcAL5pKWwjB3K713nk0X\nztG2gKvn2c92YPsc9XuBcxYahyaP72OQXrx8h7QkqcNwkCR1HPOtrBqN1wYkTTLDQceNgSitHJ5W\nWoHWbb2TB/Y9uyI+3G6+D+EbnIOkyeORg+bkX/nSic1wOAEt9oXfoJBOPIbDMlhJL6ae1pE0F8NB\ni2KYSCcGw2GCLeURyCS/qK+kIy3pROHdSpKkDo8cNFHmO4pYrqMoj1SkuRkOmljznQobJkCO3CZp\ncQwHrWhHu5YyyddZpElnOEwAT3OMzzAB4nOiE5Hh8CLgX8jLa77/voaGXsy8W0kakZ8PpRczjxxW\niFFegHzROn48NagXG48cJEkdHjlIS8yjCL0YGA7SMvJuKK1UhoM0Zou9NmSY6HgwHKQV5nh8xIhk\nOEgrmHekablMTDgk2Qh8BjgJ+HxVXT/mIUkr1tGOIpbqCMMjlRe3iQiHJCcBnwXeBuwF7kmys6oe\nGu/IpJVvoc+fuubcQ1y59U5f4PVrJiIcgPOB2ap6FCDJrcAmwHCQjhNPUWnQpITDGuDxgfW9wAVj\nGoukRVqOYPFIZrxSVeMeA0neDWysqn/e1t8HXFBVHzqi3RZgS1t9DfDDEX/lK4G/GrHvpHAOk8E5\nTAbnMJx/UFWvGqbhpBw57APOHFhf22q/pqq2AduO9Zclubeqpo91P+PkHCaDc5gMzmHpTcpnK90D\nrE9yVpKTgcuAnWMekySdsCbiyKGqDiX5ELCL/q2s26vqwTEPS5JOWBMRDgBVdRdw13H6dcd8amoC\nOIfJ4Bwmg3NYYhNxQVqSNFkm5ZqDJGmCnFDhkGRjkh8mmU2yddzjWawkZyb5RpKHkjyY5CPjHtOo\nkpyU5LtJvjzusYwqyeoktyf5QZKHk7xp3GNarCT/sv1b+n6SLyR56bjHtJAk25M8leT7A7XTk+xO\n8kj7edo4x7iQeebw79u/pfuTfCnJ6nGO8YQJh4GP6LgYOBt4b5KzxzuqRTsEXFNVZwMbgKtX4BwO\n+wjw8LgHcYw+A3y1ql4LvI4VNp8ka4B/AUxX1Tn0bwa5bLyjGspNwMYjaluBPVW1HtjT1ifZTXTn\nsBs4p6r+IfC/gGuP96AGnTDhwMBHdFTVL4HDH9GxYlTV/qr6Tlv+a/ovRmvGO6rFS7IWeAfw+XGP\nZVRJXgH8HnAjQFX9sqqeGe+oRrIKOCXJKuC3gP8z5vEsqKq+CRw4orwJ2NGWdwCXHtdBLdJcc6iq\nr1XVobZ6N/33e43NiRQOc31Ex4p7YT0syTrgDcC3xjuSkfwJ8FHg78Y9kGNwFvBT4M/a6bHPJzl1\n3INajKraB/wH4CfAfuDZqvraeEc1sqmq2t+WnwCmxjmYJfDPgK+McwAnUji8aCR5GfCXwB9W1XPj\nHs9iJHkn8FRV3TfusRyjVcB5wOeq6g3Az5n8Uxm/pp2X30Q/6P4+cGqSfzreUR276t+CuWJvw0zy\nr+mfQr5lnOM4kcJhqI/omHRJXkI/GG6pqi+OezwjeDPwriSP0T+199Yk/3W8QxrJXmBvVR0+crud\nflisJL8P/KiqflpV/w/4IvCPxjymUT2Z5AyA9vOpMY9nJEmuBN4JXF5jfp/BiRQOK/4jOpKE/jnu\nh6vqj8c9nlFU1bVVtbaq1tF/Dr5eVSvur9WqegJ4PMlrWulCVt5HzP8E2JDkt9q/rQtZYRfVB+wE\nNrflzcAdYxzLSNoXnn0UeFdVPT/u8Zww4dAu9Bz+iI6HgdtW4Ed0vBl4H/2/tr/XHpeMe1AnsA8D\ntyS5H3g98O/GPJ5FaUc9twPfAR6g/3owUe/SnUuSLwD/A3hNkr1JrgKuB96W5BH6R0QT/U2S88zh\nPwO/Dexu/2//l7GO0XdIS5KOdMIcOUiShmc4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKk\njv8PKv7S972dhRMAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "wiki_data[\"delta\"].hist(bins=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
channelcountryNamenamespaceminmaxm0m1m2m3m4m5m6m7m8
0#ar.wikipediaAlgeriaMain0.0000006.66695718.057.644747234.8847851108.0079785842.0839793.325478e+041.988641e+051.226715e+067.717880e+06
1#ar.wikipediaAlgeriaنقاش5.2678585.2678581.05.26785827.750330146.184800770.0807924.056676e+032.137000e+041.125741e+055.930244e+05
2#ar.wikipediaAustraliaMain5.5645205.5645201.05.56452030.963887172.299183958.7623215.335052e+032.968701e+041.651940e+059.192252e+05
3#ar.wikipediaBahrainMain3.7841903.7841901.03.78419014.32009154.189941205.0650127.760049e+022.936550e+031.111246e+044.205166e+04
4#ar.wikipediaBelgiumMain5.4595865.4595861.05.45958629.807074162.734269888.4616604.850632e+032.648244e+041.445832e+057.893641e+05
5#ar.wikipediaDenmarkويكيبيديا6.0306856.0306851.06.03068536.369165219.3309861322.7161427.976885e+034.810608e+042.901126e+051.749578e+06
6#ar.wikipediaEgyptMain0.6931478.66406028.0124.659483680.8941254142.01872026951.8032941.839678e+051.302402e+069.492862e+067.087854e+07
7#ar.wikipediaHashemite Kingdom of JordanMain0.6931475.1647868.019.51721159.503557216.944287908.0317064.171502e+032.025181e+041.013195e+055.148902e+05
8#ar.wikipediaHashemite Kingdom of Jordanنقاش المستخدم4.1271344.1271341.04.12713417.03323870.298463290.1312051.197410e+034.941874e+032.039578e+048.417612e+04
9#ar.wikipediaIranMain0.6931470.6931471.00.6931470.4804530.3330250.2308351.600027e-011.109054e-017.687378e-025.328484e-02
10#ar.wikipediaIraqMain0.0000006.52062119.061.367666287.3481811524.9356218645.0854035.101748e+043.091354e+051.907096e+061.191112e+07
11#ar.wikipediaIraqنقاش5.9269266.2785212.012.20544774.548283455.7019922787.9312461.707021e+041.046042e+056.415184e+053.937453e+06
12#ar.wikipediaIsraelMain0.0000005.4424184.013.75749564.504332308.8495821507.5222677.485986e+033.773624e+041.926923e+059.947230e+05
13#ar.wikipediaKuwaitMain0.6931475.4722719.027.385355112.181294525.6303622608.2142371.329461e+046.875724e+043.588217e+051.884294e+06
14#ar.wikipediaKuwaitنقاش المستخدم6.2728776.2728771.06.27287739.348986246.8313491548.3426949.712563e+036.092571e+043.821795e+052.397365e+06
15#ar.wikipediaLebanonMain5.2364425.9964522.011.23289463.377762359.2019902044.8115141.169018e+046.710735e+043.867371e+052.237002e+06
16#ar.wikipediaLibyaMain4.4886367.4725013.019.227964128.792904891.4256476312.4042064.538466e+043.295322e+052.407739e+061.766220e+07
17#ar.wikipediaMoroccoMain0.0000007.75405313.045.491987239.3203051502.90043110204.6948097.174515e+045.133805e+053.712545e+062.704703e+07
18#ar.wikipediaMoroccoبوابة5.5214615.5214611.05.52146130.486531168.330188929.4285525.131803e+032.833505e+041.564509e+058.638374e+05
19#ar.wikipediaMoroccoقالب4.3820274.3820271.04.38202719.20215784.144365368.7228501.615753e+037.080274e+033.102595e+041.359565e+05
20#ar.wikipediaMoroccoنقاش6.6669576.6669571.06.66695744.448313296.3349811975.6525171.317159e+048.781442e+045.854550e+053.903203e+06
21#ar.wikipediaMoroccoنقاش المستخدم5.6454475.6454471.05.64544731.871071179.9264371015.7651465.734448e+033.237352e+041.827630e+051.031779e+06
22#ar.wikipediaMoroccoويكيبيديا8.7119378.7119371.08.71193775.897851661.2173165760.4837815.018497e+044.372083e+053.808932e+063.318317e+07
23#ar.wikipediaOmanMain1.3862943.8066622.05.19295716.41249157.825323213.6731488.044423e+023.049848e+031.159256e+044.410515e+04
24#ar.wikipediaPalestineMain0.0000003.4657364.06.93147224.02265183.256163288.5438731.000017e+033.465794e+031.201153e+044.162878e+04
25#ar.wikipediaQatarMain2.1972254.8040213.09.48615234.081175136.821624594.0579052.704686e+031.264015e+045.988422e+042.856838e+05
26#ar.wikipediaSaudi ArabiaMain0.0000008.31139897.0308.7907381375.6009107038.33958439448.0363222.366367e+051.497962e+069.907004e+066.793387e+07
27#ar.wikipediaSaudi Arabiaمستخدم3.3672963.3672961.03.36729611.33868138.180694128.5656924.329187e+021.457765e+034.908727e+031.652914e+04
28#ar.wikipediaSaudi Arabiaويكيبيديا3.6109183.6109181.03.61091813.03872847.081777170.0084326.138865e+022.216694e+038.004299e+032.890287e+04
29#ar.wikipediaSudanMain4.4773374.9767342.09.45407144.814424213.0182721015.3117824.852247e+032.324979e+041.116849e+055.378129e+05
.............................................
994#zh.wikipediaChinaMain1.0986126.68959922.080.418326342.5176851597.8340837929.1124354.128033e+042.237338e+051.255615e+067.263910e+06
995#zh.wikipediaChinaWikipedia1.6094386.0450054.019.648868111.067428656.4989703929.6372042.360291e+041.419026e+058.533656e+055.132418e+06
996#zh.wikipediaCzech RepublicMain1.0986124.8598123.09.56934337.863454163.185710729.2645303.326287e+031.539243e+047.202931e+043.400451e+05
997#zh.wikipediaFinlandMain3.3322053.3322051.03.33220511.10358736.999422123.2896424.108263e+021.368957e+034.561646e+031.520034e+04
998#zh.wikipediaFranceMain3.6635625.1239642.08.78752639.676691183.700876869.4669854.192038e+032.051605e+041.015925e+055.076205e+05
999#zh.wikipediaGermanyMain3.1354943.1354941.03.1354949.83132430.82605996.6549313.030610e+029.502459e+022.979491e+039.342176e+03
1000#zh.wikipediaHong KongMain0.0000009.431322440.01541.1997426864.10377534431.390925189571.3545391.133382e+067.309490e+065.048145e+073.699213e+08
1001#zh.wikipediaHong KongTemplate2.0794423.5835192.05.66296017.16568555.009811183.6045396.298276e+022.198520e+037.756832e+032.754389e+04
1002#zh.wikipediaIsraelMain6.2653016.2653011.06.26530139.253999245.9381291540.8764609.654055e+036.048556e+043.789603e+052.374300e+06
1003#zh.wikipediaItalyMain5.2470245.2470241.05.24702427.531262144.457192757.9703663.977089e+032.086788e+041.094943e+055.745191e+05
1004#zh.wikipediaJapanMain0.0000008.80522512.051.071233279.2365251743.36715412017.8736048.883793e+046.890841e+055.526233e+064.537615e+07
1005#zh.wikipediaJapanTalk3.8066623.8066621.03.80666214.49067955.161125209.9797877.993222e+023.042750e+031.158272e+044.409151e+04
1006#zh.wikipediaMacaoMain1.3862946.29526612.048.543764228.3448491169.9633356287.6314183.476625e+041.958443e+051.118023e+066.448414e+06
1007#zh.wikipediaMacaoWikipedia5.3706385.3706381.05.37063828.843753154.909356831.9620774.468167e+032.399691e+041.288787e+056.921609e+05
1008#zh.wikipediaMalaysiaMain0.0000006.10924845.0127.018209459.1996561821.1409377705.1503443.435583e+041.604540e+057.817490e+053.958450e+06
1009#zh.wikipediaNew ZealandMain2.4849073.9512442.06.43615021.78708877.031814281.8724261.057839e+034.040852e+031.562117e+046.086522e+04
1010#zh.wikipediaPortugalMain2.4849075.1873862.07.67229233.083733154.930922762.2204213.850892e+031.972002e+041.016591e+055.257640e+05
1011#zh.wikipediaRepublic of KoreaMain3.0910423.0910421.03.0910429.55454329.53349991.2893012.821791e+028.722276e+022.696093e+038.333736e+03
1012#zh.wikipediaSingaporeMain1.3862944.98360712.032.615067106.962952399.0854201608.9950056.796709e+032.960535e+041.318929e+055.982321e+05
1013#zh.wikipediaTaiwanFile0.6931475.3981633.07.18992230.827563158.962321850.8365184.585605e+032.474621e+041.335760e+057.210561e+05
1014#zh.wikipediaTaiwanMain0.0000008.348775659.02209.9134549248.88059143555.882096222371.0875421.205020e+066.842703e+064.039094e+072.465605e+08
1015#zh.wikipediaTaiwanTalk2.5649496.9847168.033.299343158.796546848.4365674912.1874782.994611e+041.885966e+051.213369e+067.924473e+06
1016#zh.wikipediaTaiwanTemplate2.3025854.1271342.06.42971922.33513682.506535318.2413281.262136e+035.090911e+032.073895e+048.496629e+04
1017#zh.wikipediaTaiwanUser3.5553483.5553481.03.55534812.64050044.941377159.7822365.680815e+022.019727e+037.180834e+032.553036e+04
1018#zh.wikipediaTaiwanUser talk6.0282796.0282791.06.02827936.340142219.0684971320.6059157.960980e+034.799101e+042.893032e+051.744000e+06
1019#zh.wikipediaTaiwanWikipedia5.8435447.0596187.045.332490294.7324101923.80860712607.0441898.294237e+045.478200e+053.632255e+062.417480e+07
1020#zh.wikipediaUnited KingdomMain2.5649492.7725892.05.33753814.26621338.188290102.3765682.748609e+027.390245e+021.989885e+035.365475e+03
1021#zh.wikipediaUnited StatesMain0.0000007.19893139.0129.906500595.1102903138.03526117972.9644911.084415e+056.770740e+054.328078e+062.814110e+07
1022#zh.wikipediaUnited StatesTemplate2.9957322.9957321.02.9957328.97441226.88493580.5400682.412765e+027.227997e+022.165315e+036.486703e+03
1023#zh.wikipediaVietnamMain2.0794425.2832045.018.00348873.970915332.1140261572.0943107.672361e+033.813046e+041.916971e+059.712955e+05
\n", + "

1024 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " channel countryName namespace min \\\n", + "0 #ar.wikipedia Algeria Main 0.000000 \n", + "1 #ar.wikipedia Algeria نقاش 5.267858 \n", + "2 #ar.wikipedia Australia Main 5.564520 \n", + "3 #ar.wikipedia Bahrain Main 3.784190 \n", + "4 #ar.wikipedia Belgium Main 5.459586 \n", + "5 #ar.wikipedia Denmark ويكيبيديا 6.030685 \n", + "6 #ar.wikipedia Egypt Main 0.693147 \n", + "7 #ar.wikipedia Hashemite Kingdom of Jordan Main 0.693147 \n", + "8 #ar.wikipedia Hashemite Kingdom of Jordan نقاش المستخدم 4.127134 \n", + "9 #ar.wikipedia Iran Main 0.693147 \n", + "10 #ar.wikipedia Iraq Main 0.000000 \n", + "11 #ar.wikipedia Iraq نقاش 5.926926 \n", + "12 #ar.wikipedia Israel Main 0.000000 \n", + "13 #ar.wikipedia Kuwait Main 0.693147 \n", + "14 #ar.wikipedia Kuwait نقاش المستخدم 6.272877 \n", + "15 #ar.wikipedia Lebanon Main 5.236442 \n", + "16 #ar.wikipedia Libya Main 4.488636 \n", + "17 #ar.wikipedia Morocco Main 0.000000 \n", + "18 #ar.wikipedia Morocco بوابة 5.521461 \n", + "19 #ar.wikipedia Morocco قالب 4.382027 \n", + "20 #ar.wikipedia Morocco نقاش 6.666957 \n", + "21 #ar.wikipedia Morocco نقاش المستخدم 5.645447 \n", + "22 #ar.wikipedia Morocco ويكيبيديا 8.711937 \n", + "23 #ar.wikipedia Oman Main 1.386294 \n", + "24 #ar.wikipedia Palestine Main 0.000000 \n", + "25 #ar.wikipedia Qatar Main 2.197225 \n", + "26 #ar.wikipedia Saudi Arabia Main 0.000000 \n", + "27 #ar.wikipedia Saudi Arabia مستخدم 3.367296 \n", + "28 #ar.wikipedia Saudi Arabia ويكيبيديا 3.610918 \n", + "29 #ar.wikipedia Sudan Main 4.477337 \n", + "... ... ... ... ... \n", + "994 #zh.wikipedia China Main 1.098612 \n", + "995 #zh.wikipedia China Wikipedia 1.609438 \n", + "996 #zh.wikipedia Czech Republic Main 1.098612 \n", + "997 #zh.wikipedia Finland Main 3.332205 \n", + "998 #zh.wikipedia France Main 3.663562 \n", + "999 #zh.wikipedia Germany Main 3.135494 \n", + "1000 #zh.wikipedia Hong Kong Main 0.000000 \n", + "1001 #zh.wikipedia Hong Kong Template 2.079442 \n", + "1002 #zh.wikipedia Israel Main 6.265301 \n", + "1003 #zh.wikipedia Italy Main 5.247024 \n", + "1004 #zh.wikipedia Japan Main 0.000000 \n", + "1005 #zh.wikipedia Japan Talk 3.806662 \n", + "1006 #zh.wikipedia Macao Main 1.386294 \n", + "1007 #zh.wikipedia Macao Wikipedia 5.370638 \n", + "1008 #zh.wikipedia Malaysia Main 0.000000 \n", + "1009 #zh.wikipedia New Zealand Main 2.484907 \n", + "1010 #zh.wikipedia Portugal Main 2.484907 \n", + "1011 #zh.wikipedia Republic of Korea Main 3.091042 \n", + "1012 #zh.wikipedia Singapore Main 1.386294 \n", + "1013 #zh.wikipedia Taiwan File 0.693147 \n", + "1014 #zh.wikipedia Taiwan Main 0.000000 \n", + "1015 #zh.wikipedia Taiwan Talk 2.564949 \n", + "1016 #zh.wikipedia Taiwan Template 2.302585 \n", + "1017 #zh.wikipedia Taiwan User 3.555348 \n", + "1018 #zh.wikipedia Taiwan User talk 6.028279 \n", + "1019 #zh.wikipedia Taiwan Wikipedia 5.843544 \n", + "1020 #zh.wikipedia United Kingdom Main 2.564949 \n", + "1021 #zh.wikipedia United States Main 0.000000 \n", + "1022 #zh.wikipedia United States Template 2.995732 \n", + "1023 #zh.wikipedia Vietnam Main 2.079442 \n", + "\n", + " max m0 m1 m2 m3 m4 \\\n", + "0 6.666957 18.0 57.644747 234.884785 1108.007978 5842.083979 \n", + "1 5.267858 1.0 5.267858 27.750330 146.184800 770.080792 \n", + "2 5.564520 1.0 5.564520 30.963887 172.299183 958.762321 \n", + "3 3.784190 1.0 3.784190 14.320091 54.189941 205.065012 \n", + "4 5.459586 1.0 5.459586 29.807074 162.734269 888.461660 \n", + "5 6.030685 1.0 6.030685 36.369165 219.330986 1322.716142 \n", + "6 8.664060 28.0 124.659483 680.894125 4142.018720 26951.803294 \n", + "7 5.164786 8.0 19.517211 59.503557 216.944287 908.031706 \n", + "8 4.127134 1.0 4.127134 17.033238 70.298463 290.131205 \n", + "9 0.693147 1.0 0.693147 0.480453 0.333025 0.230835 \n", + "10 6.520621 19.0 61.367666 287.348181 1524.935621 8645.085403 \n", + "11 6.278521 2.0 12.205447 74.548283 455.701992 2787.931246 \n", + "12 5.442418 4.0 13.757495 64.504332 308.849582 1507.522267 \n", + "13 5.472271 9.0 27.385355 112.181294 525.630362 2608.214237 \n", + "14 6.272877 1.0 6.272877 39.348986 246.831349 1548.342694 \n", + "15 5.996452 2.0 11.232894 63.377762 359.201990 2044.811514 \n", + "16 7.472501 3.0 19.227964 128.792904 891.425647 6312.404206 \n", + "17 7.754053 13.0 45.491987 239.320305 1502.900431 10204.694809 \n", + "18 5.521461 1.0 5.521461 30.486531 168.330188 929.428552 \n", + "19 4.382027 1.0 4.382027 19.202157 84.144365 368.722850 \n", + "20 6.666957 1.0 6.666957 44.448313 296.334981 1975.652517 \n", + "21 5.645447 1.0 5.645447 31.871071 179.926437 1015.765146 \n", + "22 8.711937 1.0 8.711937 75.897851 661.217316 5760.483781 \n", + "23 3.806662 2.0 5.192957 16.412491 57.825323 213.673148 \n", + "24 3.465736 4.0 6.931472 24.022651 83.256163 288.543873 \n", + "25 4.804021 3.0 9.486152 34.081175 136.821624 594.057905 \n", + "26 8.311398 97.0 308.790738 1375.600910 7038.339584 39448.036322 \n", + "27 3.367296 1.0 3.367296 11.338681 38.180694 128.565692 \n", + "28 3.610918 1.0 3.610918 13.038728 47.081777 170.008432 \n", + "29 4.976734 2.0 9.454071 44.814424 213.018272 1015.311782 \n", + "... ... ... ... ... ... ... \n", + "994 6.689599 22.0 80.418326 342.517685 1597.834083 7929.112435 \n", + "995 6.045005 4.0 19.648868 111.067428 656.498970 3929.637204 \n", + "996 4.859812 3.0 9.569343 37.863454 163.185710 729.264530 \n", + "997 3.332205 1.0 3.332205 11.103587 36.999422 123.289642 \n", + "998 5.123964 2.0 8.787526 39.676691 183.700876 869.466985 \n", + "999 3.135494 1.0 3.135494 9.831324 30.826059 96.654931 \n", + "1000 9.431322 440.0 1541.199742 6864.103775 34431.390925 189571.354539 \n", + "1001 3.583519 2.0 5.662960 17.165685 55.009811 183.604539 \n", + "1002 6.265301 1.0 6.265301 39.253999 245.938129 1540.876460 \n", + "1003 5.247024 1.0 5.247024 27.531262 144.457192 757.970366 \n", + "1004 8.805225 12.0 51.071233 279.236525 1743.367154 12017.873604 \n", + "1005 3.806662 1.0 3.806662 14.490679 55.161125 209.979787 \n", + "1006 6.295266 12.0 48.543764 228.344849 1169.963335 6287.631418 \n", + "1007 5.370638 1.0 5.370638 28.843753 154.909356 831.962077 \n", + "1008 6.109248 45.0 127.018209 459.199656 1821.140937 7705.150344 \n", + "1009 3.951244 2.0 6.436150 21.787088 77.031814 281.872426 \n", + "1010 5.187386 2.0 7.672292 33.083733 154.930922 762.220421 \n", + "1011 3.091042 1.0 3.091042 9.554543 29.533499 91.289301 \n", + "1012 4.983607 12.0 32.615067 106.962952 399.085420 1608.995005 \n", + "1013 5.398163 3.0 7.189922 30.827563 158.962321 850.836518 \n", + "1014 8.348775 659.0 2209.913454 9248.880591 43555.882096 222371.087542 \n", + "1015 6.984716 8.0 33.299343 158.796546 848.436567 4912.187478 \n", + "1016 4.127134 2.0 6.429719 22.335136 82.506535 318.241328 \n", + "1017 3.555348 1.0 3.555348 12.640500 44.941377 159.782236 \n", + "1018 6.028279 1.0 6.028279 36.340142 219.068497 1320.605915 \n", + "1019 7.059618 7.0 45.332490 294.732410 1923.808607 12607.044189 \n", + "1020 2.772589 2.0 5.337538 14.266213 38.188290 102.376568 \n", + "1021 7.198931 39.0 129.906500 595.110290 3138.035261 17972.964491 \n", + "1022 2.995732 1.0 2.995732 8.974412 26.884935 80.540068 \n", + "1023 5.283204 5.0 18.003488 73.970915 332.114026 1572.094310 \n", + "\n", + " m5 m6 m7 m8 \n", + "0 3.325478e+04 1.988641e+05 1.226715e+06 7.717880e+06 \n", + "1 4.056676e+03 2.137000e+04 1.125741e+05 5.930244e+05 \n", + "2 5.335052e+03 2.968701e+04 1.651940e+05 9.192252e+05 \n", + "3 7.760049e+02 2.936550e+03 1.111246e+04 4.205166e+04 \n", + "4 4.850632e+03 2.648244e+04 1.445832e+05 7.893641e+05 \n", + "5 7.976885e+03 4.810608e+04 2.901126e+05 1.749578e+06 \n", + "6 1.839678e+05 1.302402e+06 9.492862e+06 7.087854e+07 \n", + "7 4.171502e+03 2.025181e+04 1.013195e+05 5.148902e+05 \n", + "8 1.197410e+03 4.941874e+03 2.039578e+04 8.417612e+04 \n", + "9 1.600027e-01 1.109054e-01 7.687378e-02 5.328484e-02 \n", + "10 5.101748e+04 3.091354e+05 1.907096e+06 1.191112e+07 \n", + "11 1.707021e+04 1.046042e+05 6.415184e+05 3.937453e+06 \n", + "12 7.485986e+03 3.773624e+04 1.926923e+05 9.947230e+05 \n", + "13 1.329461e+04 6.875724e+04 3.588217e+05 1.884294e+06 \n", + "14 9.712563e+03 6.092571e+04 3.821795e+05 2.397365e+06 \n", + "15 1.169018e+04 6.710735e+04 3.867371e+05 2.237002e+06 \n", + "16 4.538466e+04 3.295322e+05 2.407739e+06 1.766220e+07 \n", + "17 7.174515e+04 5.133805e+05 3.712545e+06 2.704703e+07 \n", + "18 5.131803e+03 2.833505e+04 1.564509e+05 8.638374e+05 \n", + "19 1.615753e+03 7.080274e+03 3.102595e+04 1.359565e+05 \n", + "20 1.317159e+04 8.781442e+04 5.854550e+05 3.903203e+06 \n", + "21 5.734448e+03 3.237352e+04 1.827630e+05 1.031779e+06 \n", + "22 5.018497e+04 4.372083e+05 3.808932e+06 3.318317e+07 \n", + "23 8.044423e+02 3.049848e+03 1.159256e+04 4.410515e+04 \n", + "24 1.000017e+03 3.465794e+03 1.201153e+04 4.162878e+04 \n", + "25 2.704686e+03 1.264015e+04 5.988422e+04 2.856838e+05 \n", + "26 2.366367e+05 1.497962e+06 9.907004e+06 6.793387e+07 \n", + "27 4.329187e+02 1.457765e+03 4.908727e+03 1.652914e+04 \n", + "28 6.138865e+02 2.216694e+03 8.004299e+03 2.890287e+04 \n", + "29 4.852247e+03 2.324979e+04 1.116849e+05 5.378129e+05 \n", + "... ... ... ... ... \n", + "994 4.128033e+04 2.237338e+05 1.255615e+06 7.263910e+06 \n", + "995 2.360291e+04 1.419026e+05 8.533656e+05 5.132418e+06 \n", + "996 3.326287e+03 1.539243e+04 7.202931e+04 3.400451e+05 \n", + "997 4.108263e+02 1.368957e+03 4.561646e+03 1.520034e+04 \n", + "998 4.192038e+03 2.051605e+04 1.015925e+05 5.076205e+05 \n", + "999 3.030610e+02 9.502459e+02 2.979491e+03 9.342176e+03 \n", + "1000 1.133382e+06 7.309490e+06 5.048145e+07 3.699213e+08 \n", + "1001 6.298276e+02 2.198520e+03 7.756832e+03 2.754389e+04 \n", + "1002 9.654055e+03 6.048556e+04 3.789603e+05 2.374300e+06 \n", + "1003 3.977089e+03 2.086788e+04 1.094943e+05 5.745191e+05 \n", + "1004 8.883793e+04 6.890841e+05 5.526233e+06 4.537615e+07 \n", + "1005 7.993222e+02 3.042750e+03 1.158272e+04 4.409151e+04 \n", + "1006 3.476625e+04 1.958443e+05 1.118023e+06 6.448414e+06 \n", + "1007 4.468167e+03 2.399691e+04 1.288787e+05 6.921609e+05 \n", + "1008 3.435583e+04 1.604540e+05 7.817490e+05 3.958450e+06 \n", + "1009 1.057839e+03 4.040852e+03 1.562117e+04 6.086522e+04 \n", + "1010 3.850892e+03 1.972002e+04 1.016591e+05 5.257640e+05 \n", + "1011 2.821791e+02 8.722276e+02 2.696093e+03 8.333736e+03 \n", + "1012 6.796709e+03 2.960535e+04 1.318929e+05 5.982321e+05 \n", + "1013 4.585605e+03 2.474621e+04 1.335760e+05 7.210561e+05 \n", + "1014 1.205020e+06 6.842703e+06 4.039094e+07 2.465605e+08 \n", + "1015 2.994611e+04 1.885966e+05 1.213369e+06 7.924473e+06 \n", + "1016 1.262136e+03 5.090911e+03 2.073895e+04 8.496629e+04 \n", + "1017 5.680815e+02 2.019727e+03 7.180834e+03 2.553036e+04 \n", + "1018 7.960980e+03 4.799101e+04 2.893032e+05 1.744000e+06 \n", + "1019 8.294237e+04 5.478200e+05 3.632255e+06 2.417480e+07 \n", + "1020 2.748609e+02 7.390245e+02 1.989885e+03 5.365475e+03 \n", + "1021 1.084415e+05 6.770740e+05 4.328078e+06 2.814110e+07 \n", + "1022 2.412765e+02 7.227997e+02 2.165315e+03 6.486703e+03 \n", + "1023 7.672361e+03 3.813046e+04 1.916971e+05 9.712955e+05 \n", + "\n", + "[1024 rows x 14 columns]" + ] + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wiki_cube = wiki_data.groupby(attributes).agg({metric: [\n", + " 'min',\n", + " 'max',\n", + " moment(0),\n", + " moment(1),\n", + " moment(2),\n", + " moment(3),\n", + " moment(4),\n", + " moment(5),\n", + " moment(6),\n", + " moment(7),\n", + " moment(8)\n", + "]}).reset_index(col_level=1)\n", + "wiki_cube.columns = wiki_cube.columns.get_level_values(1)\n", + "wiki_cube" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "wiki_cube.to_csv('lib/src/test/resources/wiki_moments_cubed.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
channelcountryNamenamespacecountoutliers1outliers5outliers10
0#ar.wikipediaAlgeriaMain180.00.02.0
1#ar.wikipediaAlgeriaنقاش10.00.00.0
2#ar.wikipediaAustraliaMain10.00.00.0
3#ar.wikipediaBahrainMain10.00.00.0
4#ar.wikipediaBelgiumMain10.00.00.0
5#ar.wikipediaDenmarkويكيبيديا10.00.01.0
6#ar.wikipediaEgyptMain281.04.08.0
7#ar.wikipediaHashemite Kingdom of JordanMain80.00.00.0
8#ar.wikipediaHashemite Kingdom of Jordanنقاش المستخدم10.00.00.0
9#ar.wikipediaIranMain10.00.00.0
10#ar.wikipediaIraqMain190.00.04.0
11#ar.wikipediaIraqنقاش20.00.01.0
12#ar.wikipediaIsraelMain40.00.00.0
13#ar.wikipediaKuwaitMain90.00.00.0
14#ar.wikipediaKuwaitنقاش المستخدم10.00.01.0
15#ar.wikipediaLebanonMain20.00.00.0
16#ar.wikipediaLibyaMain30.02.02.0
17#ar.wikipediaMoroccoMain130.02.04.0
18#ar.wikipediaMoroccoبوابة10.00.00.0
19#ar.wikipediaMoroccoقالب10.00.00.0
20#ar.wikipediaMoroccoنقاش10.00.01.0
21#ar.wikipediaMoroccoنقاش المستخدم10.00.00.0
22#ar.wikipediaMoroccoويكيبيديا11.01.01.0
23#ar.wikipediaOmanMain20.00.00.0
24#ar.wikipediaPalestineMain40.00.00.0
25#ar.wikipediaQatarMain30.00.00.0
26#ar.wikipediaSaudi ArabiaMain970.03.08.0
27#ar.wikipediaSaudi Arabiaمستخدم10.00.00.0
28#ar.wikipediaSaudi Arabiaويكيبيديا10.00.00.0
29#ar.wikipediaSudanMain20.00.00.0
........................
994#zh.wikipediaChinaMain220.00.01.0
995#zh.wikipediaChinaWikipedia40.00.02.0
996#zh.wikipediaCzech RepublicMain30.00.00.0
997#zh.wikipediaFinlandMain10.00.00.0
998#zh.wikipediaFranceMain20.00.00.0
999#zh.wikipediaGermanyMain10.00.00.0
1000#zh.wikipediaHong KongMain4402.014.022.0
1001#zh.wikipediaHong KongTemplate20.00.00.0
1002#zh.wikipediaIsraelMain10.00.01.0
1003#zh.wikipediaItalyMain10.00.00.0
1004#zh.wikipediaJapanMain121.02.03.0
1005#zh.wikipediaJapanTalk10.00.00.0
1006#zh.wikipediaMacaoMain120.00.01.0
1007#zh.wikipediaMacaoWikipedia10.00.00.0
1008#zh.wikipediaMalaysiaMain450.00.01.0
1009#zh.wikipediaNew ZealandMain20.00.00.0
1010#zh.wikipediaPortugalMain20.00.00.0
1011#zh.wikipediaRepublic of KoreaMain10.00.00.0
1012#zh.wikipediaSingaporeMain120.00.00.0
1013#zh.wikipediaTaiwanFile30.00.00.0
1014#zh.wikipediaTaiwanMain6590.06.039.0
1015#zh.wikipediaTaiwanTalk80.01.01.0
1016#zh.wikipediaTaiwanTemplate20.00.00.0
1017#zh.wikipediaTaiwanUser10.00.00.0
1018#zh.wikipediaTaiwanUser talk10.00.01.0
1019#zh.wikipediaTaiwanWikipedia70.02.06.0
1020#zh.wikipediaUnited KingdomMain20.00.00.0
1021#zh.wikipediaUnited StatesMain390.01.05.0
1022#zh.wikipediaUnited StatesTemplate10.00.00.0
1023#zh.wikipediaVietnamMain50.00.00.0
\n", + "

1024 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " channel countryName namespace count \\\n", + "0 #ar.wikipedia Algeria Main 18 \n", + "1 #ar.wikipedia Algeria نقاش 1 \n", + "2 #ar.wikipedia Australia Main 1 \n", + "3 #ar.wikipedia Bahrain Main 1 \n", + "4 #ar.wikipedia Belgium Main 1 \n", + "5 #ar.wikipedia Denmark ويكيبيديا 1 \n", + "6 #ar.wikipedia Egypt Main 28 \n", + "7 #ar.wikipedia Hashemite Kingdom of Jordan Main 8 \n", + "8 #ar.wikipedia Hashemite Kingdom of Jordan نقاش المستخدم 1 \n", + "9 #ar.wikipedia Iran Main 1 \n", + "10 #ar.wikipedia Iraq Main 19 \n", + "11 #ar.wikipedia Iraq نقاش 2 \n", + "12 #ar.wikipedia Israel Main 4 \n", + "13 #ar.wikipedia Kuwait Main 9 \n", + "14 #ar.wikipedia Kuwait نقاش المستخدم 1 \n", + "15 #ar.wikipedia Lebanon Main 2 \n", + "16 #ar.wikipedia Libya Main 3 \n", + "17 #ar.wikipedia Morocco Main 13 \n", + "18 #ar.wikipedia Morocco بوابة 1 \n", + "19 #ar.wikipedia Morocco قالب 1 \n", + "20 #ar.wikipedia Morocco نقاش 1 \n", + "21 #ar.wikipedia Morocco نقاش المستخدم 1 \n", + "22 #ar.wikipedia Morocco ويكيبيديا 1 \n", + "23 #ar.wikipedia Oman Main 2 \n", + "24 #ar.wikipedia Palestine Main 4 \n", + "25 #ar.wikipedia Qatar Main 3 \n", + "26 #ar.wikipedia Saudi Arabia Main 97 \n", + "27 #ar.wikipedia Saudi Arabia مستخدم 1 \n", + "28 #ar.wikipedia Saudi Arabia ويكيبيديا 1 \n", + "29 #ar.wikipedia Sudan Main 2 \n", + "... ... ... ... ... \n", + "994 #zh.wikipedia China Main 22 \n", + "995 #zh.wikipedia China Wikipedia 4 \n", + "996 #zh.wikipedia Czech Republic Main 3 \n", + "997 #zh.wikipedia Finland Main 1 \n", + "998 #zh.wikipedia France Main 2 \n", + "999 #zh.wikipedia Germany Main 1 \n", + "1000 #zh.wikipedia Hong Kong Main 440 \n", + "1001 #zh.wikipedia Hong Kong Template 2 \n", + "1002 #zh.wikipedia Israel Main 1 \n", + "1003 #zh.wikipedia Italy Main 1 \n", + "1004 #zh.wikipedia Japan Main 12 \n", + "1005 #zh.wikipedia Japan Talk 1 \n", + "1006 #zh.wikipedia Macao Main 12 \n", + "1007 #zh.wikipedia Macao Wikipedia 1 \n", + "1008 #zh.wikipedia Malaysia Main 45 \n", + "1009 #zh.wikipedia New Zealand Main 2 \n", + "1010 #zh.wikipedia Portugal Main 2 \n", + "1011 #zh.wikipedia Republic of Korea Main 1 \n", + "1012 #zh.wikipedia Singapore Main 12 \n", + "1013 #zh.wikipedia Taiwan File 3 \n", + "1014 #zh.wikipedia Taiwan Main 659 \n", + "1015 #zh.wikipedia Taiwan Talk 8 \n", + "1016 #zh.wikipedia Taiwan Template 2 \n", + "1017 #zh.wikipedia Taiwan User 1 \n", + "1018 #zh.wikipedia Taiwan User talk 1 \n", + "1019 #zh.wikipedia Taiwan Wikipedia 7 \n", + "1020 #zh.wikipedia United Kingdom Main 2 \n", + "1021 #zh.wikipedia United States Main 39 \n", + "1022 #zh.wikipedia United States Template 1 \n", + "1023 #zh.wikipedia Vietnam Main 5 \n", + "\n", + " outliers1 outliers5 outliers10 \n", + "0 0.0 0.0 2.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 1.0 \n", + "6 1.0 4.0 8.0 \n", + "7 0.0 0.0 0.0 \n", + "8 0.0 0.0 0.0 \n", + "9 0.0 0.0 0.0 \n", + "10 0.0 0.0 4.0 \n", + "11 0.0 0.0 1.0 \n", + "12 0.0 0.0 0.0 \n", + "13 0.0 0.0 0.0 \n", + "14 0.0 0.0 1.0 \n", + "15 0.0 0.0 0.0 \n", + "16 0.0 2.0 2.0 \n", + "17 0.0 2.0 4.0 \n", + "18 0.0 0.0 0.0 \n", + "19 0.0 0.0 0.0 \n", + "20 0.0 0.0 1.0 \n", + "21 0.0 0.0 0.0 \n", + "22 1.0 1.0 1.0 \n", + "23 0.0 0.0 0.0 \n", + "24 0.0 0.0 0.0 \n", + "25 0.0 0.0 0.0 \n", + "26 0.0 3.0 8.0 \n", + "27 0.0 0.0 0.0 \n", + "28 0.0 0.0 0.0 \n", + "29 0.0 0.0 0.0 \n", + "... ... ... ... \n", + "994 0.0 0.0 1.0 \n", + "995 0.0 0.0 2.0 \n", + "996 0.0 0.0 0.0 \n", + "997 0.0 0.0 0.0 \n", + "998 0.0 0.0 0.0 \n", + "999 0.0 0.0 0.0 \n", + "1000 2.0 14.0 22.0 \n", + "1001 0.0 0.0 0.0 \n", + "1002 0.0 0.0 1.0 \n", + "1003 0.0 0.0 0.0 \n", + "1004 1.0 2.0 3.0 \n", + "1005 0.0 0.0 0.0 \n", + "1006 0.0 0.0 1.0 \n", + "1007 0.0 0.0 0.0 \n", + "1008 0.0 0.0 1.0 \n", + "1009 0.0 0.0 0.0 \n", + "1010 0.0 0.0 0.0 \n", + "1011 0.0 0.0 0.0 \n", + "1012 0.0 0.0 0.0 \n", + "1013 0.0 0.0 0.0 \n", + "1014 0.0 6.0 39.0 \n", + "1015 0.0 1.0 1.0 \n", + "1016 0.0 0.0 0.0 \n", + "1017 0.0 0.0 0.0 \n", + "1018 0.0 0.0 1.0 \n", + "1019 0.0 2.0 6.0 \n", + "1020 0.0 0.0 0.0 \n", + "1021 0.0 1.0 5.0 \n", + "1022 0.0 0.0 0.0 \n", + "1023 0.0 0.0 0.0 \n", + "\n", + "[1024 rows x 7 columns]" + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1 = wiki_data[metric].quantile(0.99)\n", + "t5 = wiki_data[metric].quantile(0.95)\n", + "t10 = wiki_data[metric].quantile(0.90)\n", + "wiki_oracle = wiki_data.groupby(attributes).agg({metric: [\n", + " 'count',\n", + " outliers(t1, \"1\"),\n", + " outliers(t5, \"5\"),\n", + " outliers(t10, \"10\")\n", + "]}).reset_index(col_level=1)\n", + "wiki_oracle.columns = wiki_oracle.columns.get_level_values(1)\n", + "wiki_oracle" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "wiki_oracle.to_csv('lib/src/test/resources/wiki_oracle_cubed.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lib/momentBench.json b/lib/momentBenchMilan.json similarity index 79% rename from lib/momentBench.json rename to lib/momentBenchMilan.json index 0c73062de..1d941ff1f 100644 --- a/lib/momentBench.json +++ b/lib/momentBenchMilan.json @@ -1,5 +1,5 @@ { - "minSupport": 0.005, + "minSupport": 0.006, "percentile": 1.0, "outlierColumn": "outliers1", "numWarmupTrials": 10, @@ -9,5 +9,7 @@ "attributes": [ "Grid", "Country" - ] + ], + "doContainment": true, + "verbose": true } \ No newline at end of file diff --git a/lib/momentBenchWiki.json b/lib/momentBenchWiki.json new file mode 100644 index 000000000..326cc09c2 --- /dev/null +++ b/lib/momentBenchWiki.json @@ -0,0 +1,16 @@ +{ + "minSupport": 0.1, + "percentile": 1.0, + "outlierColumn": "outliers1", + "numWarmupTrials": 10, + "numTrials": 10, + "oracleCubeFilename": "src/test/resources/wiki_oracle_cubed.csv", + "momentCubeFilename": "src/test/resources/wiki_moments_cubed.csv", + "attributes": [ + "channel", + "countryName", + "namespace" + ], + "doContainment": true, + "verbose": false +} \ No newline at end of file diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java index 95ea31e5e..2707c7f62 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java @@ -22,6 +22,7 @@ public class APLMomentSummarizerBench { String momentCubeFilename; boolean doContainment; List attributes; + boolean verbose; public APLMomentSummarizerBench(String confFile) throws IOException { RunConfig conf = RunConfig.fromJsonFile(confFile); @@ -35,6 +36,7 @@ public APLMomentSummarizerBench(String confFile) throws IOException { momentCubeFilename = conf.get("momentCubeFilename"); doContainment = conf.get("doContainment", false); attributes = conf.get("attributes"); + verbose = conf.get("verbose", false); } public static void main(String[] args) throws Exception { @@ -52,8 +54,6 @@ public void run() throws Exception { public void testOracleOrder3() throws Exception { List requiredColumns = new ArrayList<>(attributes); -// requiredColumns.add("Grid"); -// requiredColumns.add("Country"); Map colTypes = new HashMap<>(); colTypes.put("count", Schema.ColType.DOUBLE); colTypes.put(outlierColumn, Schema.ColType.DOUBLE); @@ -69,6 +69,8 @@ public void testOracleOrder3() throws Exception { summ.setMinSupport(minSupport); summ.setMinRatioMetric(10.0); summ.setAttributes(attributes); + summ.setDoContainment(doContainment); + summ.onlyUseSupport(true); for (int i = 0; i < numWarmupTrials; i++) { summ.process(df); } @@ -80,13 +82,13 @@ public void testOracleOrder3() throws Exception { System.out.format("Oracle time: %g\n", timeElapsed / (1.e9 * numTrials)); APLExplanation e = summ.getResults(); System.out.format("Num results: %d\n\n", e.getResults().size()); -// System.out.println(e.prettyPrint()); + if (verbose) { + System.out.println(e.prettyPrint()); + } } public void testCubeOrder3(boolean useCascade) throws Exception { List requiredColumns = new ArrayList<>(attributes); -// requiredColumns.add("Grid"); -// requiredColumns.add("Country"); Map colTypes = new HashMap<>(); List momentColumns = new ArrayList<>(); for (int i = 0; i <= numMoments; i++) { @@ -140,6 +142,8 @@ public void testCubeOrder3(boolean useCascade) throws Exception { } APLExplanation e = summ.getResults(); System.out.format("Num results: %d\n\n", e.getResults().size()); -// System.out.println(e.prettyPrint()); + if (verbose) { + System.out.println(e.prettyPrint()); + } } } \ No newline at end of file diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLOutlierSummarizer.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLOutlierSummarizer.java index 37d9c2a6d..1e0bab036 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLOutlierSummarizer.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLOutlierSummarizer.java @@ -9,6 +9,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; /** @@ -18,6 +19,7 @@ public class APLOutlierSummarizer extends APLSummarizer { private Logger log = LoggerFactory.getLogger("APLOutlierSummarizer"); private String countColumn = null; + private boolean onlyUseSupport = false; @Override public List getAggregateNames() { @@ -42,15 +44,21 @@ public List getQualityMetricList() { qualityMetricList.add( new SupportMetric(0) ); - qualityMetricList.add( - new GlobalRatioMetric(0, 1) - ); + if (!onlyUseSupport) { + qualityMetricList.add( + new GlobalRatioMetric(0, 1) + ); + } return qualityMetricList; } @Override public List getThresholds() { - return Arrays.asList(minOutlierSupport, minRatioMetric); + if (onlyUseSupport) { + return Collections.singletonList(minOutlierSupport); + } else { + return Arrays.asList(minOutlierSupport, minRatioMetric); + } } @Override @@ -72,4 +80,5 @@ public void setCountColumn(String countColumn) { public double getMinRatioMetric() { return minRatioMetric; } + public void onlyUseSupport(boolean onlyUseSupport) { this.onlyUseSupport = onlyUseSupport; } } diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java index bd560d57f..8fc599027 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java @@ -72,18 +72,18 @@ public Action getAction(double[] aggregates, double threshold) { private Action getActionCascade(double[] aggregates, double threshold) { numEnterCascade++; + double outlierRateNeeded = threshold * globalCount / aggregates[momentsBaseIdx]; // Simple checks on min and max if (aggregates[maxIdx] < cutoff) { return Action.PRUNE; } - if (aggregates[minIdx] >= cutoff) { + if (aggregates[minIdx] >= cutoff && outlierRateNeeded <= 1.0) { return Action.KEEP; } numAfterNaiveCheck++; // Markov bounds - double outlierRateNeeded = threshold * globalCount / aggregates[momentsBaseIdx]; double mean = aggregates[momentsBaseIdx+1] / aggregates[momentsBaseIdx]; double min = aggregates[minIdx]; double max = aggregates[maxIdx]; From 9a6f3c59ffb53b399b0c7be92b3e693782709f93 Mon Sep 17 00:00:00 2001 From: Jialin Ding Date: Sun, 18 Feb 2018 17:13:00 -0800 Subject: [PATCH 3/7] Moved some things to msketch repo --- Moments Cube Creation.ipynb | 7651 ++++++++++------- lib/cp.txt | 1 - lib/genCP.sh | 2 - lib/momentBench.sh | 2 - lib/momentBenchMilan.json | 15 - lib/momentBenchWiki.json | 16 - .../macrobase/APLMomentSummarizerBench.java | 149 - .../futuredata/macrobase/RunConfig.java | 50 - 8 files changed, 4416 insertions(+), 3470 deletions(-) delete mode 100644 lib/cp.txt delete mode 100644 lib/genCP.sh delete mode 100644 lib/momentBench.sh delete mode 100644 lib/momentBenchMilan.json delete mode 100644 lib/momentBenchWiki.json delete mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java delete mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/RunConfig.java diff --git a/Moments Cube Creation.ipynb b/Moments Cube Creation.ipynb index f48cfc2ec..22fec53e0 100644 --- a/Moments Cube Creation.ipynb +++ b/Moments Cube Creation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 51, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -16,37 +16,99 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "milan_data = pd.read_csv('~/Downloads/sms-call-internet-mi-2013-11-01 2.txt', sep='\\t',\n", - " header=None, names = [\"Grid\", \"Time\", \"Country\", \"SMSin\", \"SMSout\",\n", - " \"Callin\", \"Callout\", \"Internet\"])" + "def moment(n):\n", + " def moment_(x):\n", + " return np.sum(np.power(x, n))\n", + " moment_.__name__ = 'm%s' % n\n", + " return moment_\n", + "\n", + "def log_moment(n):\n", + " def log_moment_(x):\n", + " return np.sum(np.power(np.log(x), n))\n", + " log_moment_.__name__ = 'lm%s' % n\n", + " return log_moment_\n", + "\n", + "def log_min():\n", + " def log_min_(x):\n", + " return np.min(np.log(x))\n", + " log_min_.__name__ = 'lmin'\n", + " return log_min_\n", + "\n", + "def log_max():\n", + " def log_max_(x):\n", + " return np.max(np.log(x))\n", + " log_max_.__name__ = 'lmax'\n", + " return log_max_\n", + "\n", + "def outliers(t, name):\n", + " def outliers_(x):\n", + " return np.sum(x >= t)\n", + " outliers_.__name__ = 'outliers%s' % name\n", + " return outliers_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Milan" + ] + }, + { + "cell_type": "code", + "execution_count": 235, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# column_names = [\"Grid\", \"Time\", \"Country\", \"SMSin\", \"SMSout\", \"Callin\", \"Callout\", \"Internet\"]\n", + "# milan_data = pd.DataFrame(columns=column_names)\n", + "# for i in range(1, 6):\n", + "# milan_data_day = pd.read_csv('~/Downloads/sms-call-internet-mi-2013-11-0' + str(i) + '.txt',\n", + "# sep='\\t', header=None,\n", + "# names=column_names)\n", + "# milan_data = milan_data.append(milan_data_day)\n", + "# milan_data.to_csv('~/msketch/javamsketch/src/test/resources/milan_5day.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "milan_data = pd.read_csv('~/msketch/javamsketch/src/test/resources/milan_5day.csv')" ] }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 239, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 116, + "execution_count": 239, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD9CAYAAABA8iukAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEyFJREFUeJzt3WGMpdV93/Hvz1AcksYGEndKd1FByjYV9qqtPQIqV9U0\npLBA1CWVbWGhsLjUW8ngptVKzbp9QWViiVRJqakc1I3ZGqI0hNJErMI62w32KOoLMDi2vF6IzQiv\nw46wSbwYd2vF1pp/X9yz68t6Zufs3Jm9d2a+H+lqn+c857n33KO785tznnOfSVUhSVKPN427AZKk\ntcPQkCR1MzQkSd0MDUlSN0NDktTN0JAkdVsyNJLsTfJKki8PlV2S5GCSF9q/F7fyJLk/yVySLyV5\n59A5O1r9F5LsGCp/V5JD7Zz7k+RMryFJGp+ekcangG2nle0GnqyqLcCTbR/gBmBLe+wEHoBBAAB3\nA1cDVwF3D4XAA8AHh87btsRrSJLGZMnQqKo/AY6dVrwdeKhtPwTcPFT+cA08BVyU5FLgeuBgVR2r\nqleBg8C2duwtVfVUDb5l+PBpz7XQa0iSxmS51zSmqurltv0NYKptbwJeGqp3tJWdqfzoAuVneg1J\n0picP+oTVFUlWdV7kSz1Gkl2MpgO48ILL3zXZZddtprNGbvXX3+dN73JNQwLsW8WZ98szr6Br371\nq39ZVW9bqt5yQ+ObSS6tqpfbFNMrrXweGP6JvbmVzQMzp5XPtvLNC9Q/02v8iKraA+wBmJ6ermef\nfXaZb2ttmJ2dZWZmZtzNmEj2zeLsm8XZN5Dk6z31lhut+4CTK6B2AI8Pld/WVlFdA7zWppgOANcl\nubhdAL8OONCOfSfJNW3V1G2nPddCryFJGpMlRxpJfpfBKOGnkxxlsArqXuDRJHcAXwfe16rvB24E\n5oDvAh8AqKpjSe4Bnmn1PlpVJy+uf4jBCq0LgU+3B2d4DUnSmCwZGlX1/kUOXbtA3QLuXOR59gJ7\nFyh/FnjHAuXfWug1JEnjs7Gv/EiSzoqhIUnqZmhIkroZGpKkboaGJKmboSFJ6jbybUQk6Vy6fPcT\np7aP3HvTGFuyMRkakibecFBovJyekiR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEndXHIraSK5zHYy\nOdKQJHUzNCRJ3ZyekjQxznZKyluKnHuONCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN\n0JAkdTM0JEnd/Ea4pLHyxoRriyMNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTNJbeS1oXTl+76\nR5lWx0gjjST/NsnhJF9O8rtJfizJFUmeTjKX5PeSXNDqvrntz7Xjlw89z0da+VeSXD9Uvq2VzSXZ\nPUpbJUmjW3ZoJNkE/GtguqreAZwH3AL8GnBfVf0M8CpwRzvlDuDVVn5fq0eSK9t5bwe2Ab+Z5Lwk\n5wGfAG4ArgTe3+pKksZk1Gsa5wMXJjkf+HHgZeDngMfa8YeAm9v29rZPO35tkrTyR6rqe1X1NWAO\nuKo95qrqxar6PvBIqytJGpNlh0ZVzQO/Dvw5g7B4Dfg88O2qOtGqHQU2te1NwEvt3BOt/k8Nl592\nzmLlkqQxWfaF8CQXM/jN/wrg28D/ZDC9dM4l2QnsBJiammJ2dnYczThnjh8/vu7f43LZN4ub1L7Z\ntfXE0pWW4Wze66T2zSQaZfXUzwNfq6q/AEjy+8C7gYuSnN9GE5uB+VZ/HrgMONqms94KfGuo/KTh\ncxYrf4Oq2gPsAZienq6ZmZkR3tbkm52dZb2/x+WybxY3qX1z+yrdsPDIrTPddSe1bybRKNc0/hy4\nJsmPt2sT1wLPAZ8F3tPq7AAeb9v72j7t+Geqqlr5LW111RXAFuBzwDPAlrYa6wIGF8v3jdBeSdKI\nlj3SqKqnkzwG/ClwAvgCg9/2nwAeSfKrrezBdsqDwG8nmQOOMQgBqupwkkcZBM4J4M6q+gFAkruA\nAwxWZu2tqsPLba8kaXQjfbmvqu4G7j6t+EUGK59Or/tXwHsXeZ6PAR9boHw/sH+UNkqaPP4NjbXL\n24hIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRuhoYkqZuhIUnq\nZmhIkrqNdJdbSerlnW3XB0cakqRuhoYkqZuhIUnqZmhIkrp5IVzSujR84f3IvTeNsSXriyMNSVI3\nQ0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEnd/HKfpFXjnW3XH0cakqRuhoYkqZuhIUnq\nZmhIkroZGpKkboaGJKnbSKGR5KIkjyX5syTPJ/mHSS5JcjDJC+3fi1vdJLk/yVySLyV559Dz7Gj1\nX0iyY6j8XUkOtXPuT5JR2itJGs2oI42PA39UVX8X+HvA88Bu4Mmq2gI82fYBbgC2tMdO4AGAJJcA\ndwNXA1cBd58Mmlbng0PnbRuxvdLYXL77iVMPaa1admgkeSvwj4EHAarq+1X1bWA78FCr9hBwc9ve\nDjxcA08BFyW5FLgeOFhVx6rqVeAgsK0de0tVPVVVBTw89FySpDEYZaRxBfAXwH9P8oUkn0zyE8BU\nVb3c6nwDmGrbm4CXhs4/2srOVH50gXJJ0piMchuR84F3Ah+uqqeTfJwfTkUBUFWVpEZpYI8kOxlM\neTE1NcXs7Oxqv+RYHT9+fN2/x+WahL45NP/aqe2tm956anvX1hOntsfRxnH0zfB7Hqel3vckfG7W\nilFC4yhwtKqebvuPMQiNbya5tKpeblNMr7Tj88BlQ+dvbmXzwMxp5bOtfPMC9X9EVe0B9gBMT0/X\nzMzMQtXWjdnZWdb7e1yuSeib24euWRy5dWbJ8nNlHH1z+4Rcv1mqvyfhc7NWLHt6qqq+AbyU5Gdb\n0bXAc8A+4OQKqB3A4217H3BbW0V1DfBam8Y6AFyX5OJ2Afw64EA79p0k17RVU7cNPZckaQxGvcvt\nh4HfSXIB8CLwAQZB9GiSO4CvA+9rdfcDNwJzwHdbXarqWJJ7gGdavY9W1bG2/SHgU8CFwKfbQ5I0\nJiOFRlV9EZhe4NC1C9Qt4M5FnmcvsHeB8meBd4zSRknSyvEb4ZKkboaGJKmboSFJ6uafe5W0orxN\nyvpmaEha94aD7Mi9N42xJWuf01OSpG6GhiSpm9NT0gpwHl8bhSMNSVI3Q0OS1M3QkCR1MzQkSd0M\nDUlSN0NDktTN0JAkdTM0JEnd/HKftIr80p/WG0cakqRujjQkjcwR1cbhSEOS1M3QkCR1MzQkSd0M\nDUlSNy+ES8vkxV9tRIaGdBYMCm10hoakDWU4+I/ce9MYW7I2eU1DktTN0JAkdTM0JEndDA1JUjdD\nQ5LUzdVTkpbF5ccbkyMNSVI3RxrSGPhdAa1VjjQkSd1GDo0k5yX5QpI/bPtXJHk6yVyS30tyQSt/\nc9ufa8cvH3qOj7TyryS5fqh8WyubS7J71LZKkkazEiONXwaeH9r/NeC+qvoZ4FXgjlZ+B/BqK7+v\n1SPJlcAtwNuBbcBvtiA6D/gEcANwJfD+VldaVy7f/cSphzTpRgqNJJuBm4BPtv0APwc81qo8BNzc\ntre3fdrxa1v97cAjVfW9qvoaMAdc1R5zVfViVX0feKTVldYtA0STbtSRxn8B/h3wetv/KeDbVXWi\n7R8FNrXtTcBLAO34a63+qfLTzlmsXJI0JstePZXkF4BXqurzSWZWrknLastOYCfA1NQUs7Oz42zO\nqjt+/Pi6f4/Ltdp9s2vriaUrrZCVfh8r3Tfnsi9Wy8n+8P9Uv1GW3L4b+GdJbgR+DHgL8HHgoiTn\nt9HEZmC+1Z8HLgOOJjkfeCvwraHyk4bPWaz8DapqD7AHYHp6umZmZkZ4W5NvdnaW9f4el2u1++b2\nczhtdOTWmRV9vpXum3PZF6vm0P8DYNfWH/DhX5gZb1vWiGVPT1XVR6pqc1VdzuBC9meq6lbgs8B7\nWrUdwONte1/bpx3/TFVVK7+lra66AtgCfA54BtjSVmNd0F5j33LbK0ka3Wp8ue9XgEeS/CrwBeDB\nVv4g8NtJ5oBjDEKAqjqc5FHgOeAEcGdV/QAgyV3AAeA8YG9VHV6F9kqSOq1IaFTVLDDbtl9ksPLp\n9Dp/Bbx3kfM/BnxsgfL9wP6VaKMkaXR+I1yS1M3QkCR1MzQkSd0MDUlSN2+NLqmbtzeRIw1JUjdD\nQ5LUzdCQJHUzNCRJ3bwQLjX+3W5paY40JEndHGloQ3MJqXR2DA1JwunJXk5PSZK6GRqSpG5OT0k6\nI6/7aJihIU0o59g1iQwNaQH+wJYW5jUNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN1dPSdJpXD23OEca\nkqRujjSkJfiNaOmHHGlIkro50pD0IxxdaTGONCRJ3QwNSVI3Q0OS1M1rGtIa4PcGNCkcaUiSuhka\nkqRuyw6NJJcl+WyS55IcTvLLrfySJAeTvND+vbiVJ8n9SeaSfCnJO4eea0er/0KSHUPl70pyqJ1z\nf5KM8mal9eDy3U+84SGdS6Nc0zgB7KqqP03yk8DnkxwEbgeerKp7k+wGdgO/AtwAbGmPq4EHgKuT\nXALcDUwD1Z5nX1W92up8EHga2A9sAz49Qpu1Qa3nawLr+b1p8iw7NKrqZeDltv1/kzwPbAK2AzOt\n2kPALIPQ2A48XFUFPJXkoiSXtroHq+oYQAuebUlmgbdU1VOt/GHgZgwNjcjfznU2DOU3WpFrGkku\nB/4BgxHBVAsUgG8AU217E/DS0GlHW9mZyo8uUC5JGpORl9wm+evA/wL+TVV9Z/iyQ1VVkhr1NTra\nsBPYCTA1NcXs7Oxqv+RYHT9+fN2/x+VarG92bT1x7hszBmf6XCz1uTk0/9qp7V1bV7BRa8DUhX2f\nEf/fjRgaSf4ag8D4nar6/Vb8zSSXVtXLbfrplVY+D1w2dPrmVjbPD6ezTpbPtvLNC9T/EVW1B9gD\nMD09XTMzMwtVWzdmZ2dZ7+9xuRbrm9s3yJTUkVtnFj221Odmo/TRQnZtPcFvHFr6x+GZ+nejGGX1\nVIAHgeer6j8PHdoHnFwBtQN4fKj8traK6hrgtTaNdQC4LsnFbaXVdcCBduw7Sa5pr3Xb0HNJksZg\nlJHGu4FfAg4l+WIr+/fAvcCjSe4Avg68rx3bD9wIzAHfBT4AUFXHktwDPNPqffTkRXHgQ8CngAsZ\nXAD3Irh0Bl601WobZfXU/wEW+97EtQvUL+DORZ5rL7B3gfJngXcst43a2FwlJa08vxEuSepmaEiS\nuhkakqRu3hpd2sC87qOzZWhI69TpgbBr6wlu3/2Eq6o0EkNDkjq5pNnQkDYcp6Q0CkND68qh+dc2\n9O0wpNXm6ilJUjdDQ5LUzdCQJHXzmobWvOELuxvt70BI55ojDUlSN0NDktTN0JAkdTM0JEndDA1J\nUjdXT2lN8lYY0ngYGpK0DBv15oWGhtYMRxfS+BkamliGhDR5DA1NFINCmmyunpIkdTM0JEndnJ7S\nWDgNJa1NjjQkSd0MDUlSN6enJGlEG+mLfoaGzhmvY0hrn6GhVWVQSOuLoaEVZ1BI65ehoWXbSPO4\nkgYMDUlaQev9lylDQ2dlsaknp6SkjcHQ0IIMAUkLmfjQSLIN+DhwHvDJqrp3zE1atwwKaWWtx6mq\niQ6NJOcBnwD+KXAUeCbJvqp6brwtWz8MCklnY6JDA7gKmKuqFwGSPAJsBzZkaJz8Ab9r6wlmFiiX\nNLnWy6hj0kNjE/DS0P5R4OoxtWVFnekH/fAHygvP0vqzlgMkVTXuNiwqyXuAbVX1L9v+LwFXV9Vd\np9XbCexsuz8LfOWcNvTc+2ngL8fdiAll3yzOvlmcfQN/u6retlSlSR9pzAOXDe1vbmVvUFV7gD3n\nqlHjluTZqpoedzsmkX2zOPtmcfZNv0m/NfozwJYkVyS5ALgF2DfmNknShjXRI42qOpHkLuAAgyW3\ne6vq8JibJUkb1kSHBkBV7Qf2j7sdE2bDTMUtg32zOPtmcfZNp4m+EC5JmiyTfk1DkjRBDI01JMl7\nkxxO8nqS6dOOfSTJXJKvJLl+XG2cBEn+Y5L5JF9sjxvH3aZxSrKtfS7mkuwed3smSZIjSQ61z8mz\n427PWjDx1zT0Bl8G/jnw34YLk1zJYGXZ24G/Bfxxkr9TVT84902cGPdV1a+PuxHj5q14uvyTqtro\n39Ho5khjDamq56tqoS8ubgceqarvVdXXgDkGt2CRTt2Kp6q+D5y8FY+0LIbG+rDQ7VY2jaktk+Ku\nJF9KsjfJxeNuzBj52TizAv53ks+3O0toCU5PTZgkfwz8zQUO/Yeqevxct2dSnamfgAeAexj8QLgH\n+A3gX5y71mkN+UdVNZ/kbwAHk/xZVf3JuBs1yQyNCVNVP7+M07put7Ke9PZTkt8C/nCVmzPJNtxn\n42xU1Xz795Ukf8BgOs/QOAOnp9aHfcAtSd6c5ApgC/C5MbdpbJJcOrT7iwwWEGxU3opnEUl+IslP\nntwGrmNjf1a6ONJYQ5L8IvBfgbcBTyT5YlVdX1WHkzzK4O+MnADu3OArp/5Tkr/PYHrqCPCvxtuc\n8fFWPGc0BfxBEhj8LPwfVfVH423S5PMb4ZKkbk5PSZK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRu\nhoYkqZuhIUnq9v8BYuvezWR1bPsAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAD8CAYAAACPWyg8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFDVJREFUeJzt3W+MXFd5x/HvQyAQpUAcEraWHcmRsFoZLAJZJa7gxZaU\nxEmqOlQQBUXEoSmuRCJRYak4baW0BKSARFOoIJJLrDhVIVi0USziYNzgEeoLkz9A8z/KNhjFKycW\ncXDYIIKWPn0xx2ayO7s7Z727d3bn+5FGvnPm3HvOHI/3t+fcO9eRmUiSVON1TXdAkrT0GB6SpGqG\nhySpmuEhSapmeEiSqhkekqRqhockqZrhIUmqZnhIkqq9vpdKEXEQ+CXwW2AiM4cj4kzgW8Aa4CBw\nZWa+FBEBfBm4DPgVcG1m/qgcZzPw9+Wwn8vMnaX8fOAO4DRgD/CpzMzp2pipr2eddVauWbOml7e1\nJL3yyiucfvrpTXej7zguUzkmUzkm3b3yyis89dRTP8/Ms3veKTNnfdD+wX3WpLIvAtvK9jbgC2X7\nMuA+IIANwA9L+ZnAs+XPFWV7RXntgVI3yr6XztTGTI/zzz8/l7P9+/c33YW+5LhM5ZhM5Zh0t3//\n/gQeyh7y4PjjZJatNgE7y/ZO4IqO8jtLnw4AZ0TESuASYF9mHs327GEfsLG89pbMPJCZCdw56Vjd\n2pAkNajX8EjgexHxcERsKWVDmXm4bD8PDJXtVcBzHfseKmUzlR/qUj5TG5KkBvV0zgN4f2aORcTb\ngX0R8VTni5mZEbGgt+edqY0SaFsAhoaGaLVaC9mVRo2Pjy/r9zdXjstUjslUjkl34+Pj1fv0FB6Z\nOVb+PBIRdwMXAC9ExMrMPFyWno6U6mPAOR27ry5lY8DIpPJWKV/dpT4ztDG5f9uB7QDDw8M5MjLS\nrdqy0Gq1WM7vb64cl6kck6kck+7mEqizLltFxOkR8ebj28DFwGPAbmBzqbYZuKds7wauibYNwLGy\n9LQXuDgiVkTEinKcveW1lyNiQ7lS65pJx+rWhiSpQb3MPIaAu9s/13k98I3M/G5EPAjsiojrgJ8B\nV5b6e2hfcTVK+1LdjwNk5tGIuBl4sNT7bGYeLduf5HeX6t5XHgC3TNOGJKlBs4ZHZj4LvLtL+YvA\nRV3KE7h+mmPtAHZ0KX8IeFevbUiSmuU3zCVJ1QwPSVK1Xi/VlaRFtWbbvSe2D95yeYM9UTfOPCRJ\n1Zx5SOp7zkL6j+EhqW90hoT6m8tWkqRqhockqZrhIUmqZnhIkqoZHpKkaoaHJKma4SFJqmZ4SJKq\nGR6SpGp+w1xSo/xW+dLkzEOSVM3wkCRVMzwkSdUMD0lSNcNDklTN8JAkVfNSXUlLiv+rYH8wPCQt\nOr/bsfS5bCVJqmZ4SJKqGR6SpGqGhySpmuEhSapmeEiSqhkekqRqhockqZrhIUmqZnhIkqr1HB4R\ncUpE/DgivlOenxsRP4yI0Yj4VkScWsrfWJ6PltfXdBzjxlL+dERc0lG+sZSNRsS2jvKubUhaetZs\nu/fEQ0tfzczjU8CTHc+/ANyame8AXgKuK+XXAS+V8ltLPSJiHXAV8E5gI/C1EkinAF8FLgXWAR8t\ndWdqQ5LUoJ7CIyJWA5cDXy/PA/gA8O1SZSdwRdneVJ5TXr+o1N8E3JWZr2bmT4FR4ILyGM3MZzPz\nN8BdwKZZ2pAkNajXu+r+M/A3wJvL87cBv8jMifL8ELCqbK8CngPIzImIOFbqrwIOdByzc5/nJpVf\nOEsbrxERW4AtAENDQ7RarR7f1tIzPj6+rN/fXDkuU/XbmGxdPzF7pUq176/fxqRfjI+PV+8za3hE\nxJ8CRzLz4YgYmUO/Flxmbge2AwwPD+fIyEizHVpArVaL5fz+5spxmarfxuTaBTjXcfDqkar6/TYm\n/WIugdrLzON9wJ9FxGXAm4C3AF8GzoiI15eZwWpgrNQfA84BDkXE64G3Ai92lB/XuU+38hdnaEOS\n1KBZz3lk5o2ZuToz19A+4f39zLwa2A98uFTbDNxTtneX55TXv5+ZWcqvKldjnQusBR4AHgTWliur\nTi1t7C77TNeGJKlBJ/M9j88An46IUdrnJ24v5bcDbyvlnwa2AWTm48Au4Angu8D1mfnbMqu4AdhL\n+2quXaXuTG1IkhpU9d/QZmYLaJXtZ2lfKTW5zq+Bj0yz/+eBz3cp3wPs6VLetQ1JUrP8hrkkqVrV\nzEOS+knnt9UP3nJ5gz0ZPIaHtID84ablymUrSVI1w0OSVM1lK2keuDylQWN4SFow3n59+XLZSpJU\nzfCQJFUzPCRJ1QwPSVI1w0OSVM3wkCRVMzwkSdUMD0lSNcNDklTN8JAkVTM8JEnVDA9JUjVvjCjN\nkTf90yBz5iFJqmZ4SJKqGR6SpGqGhySpmuEhSarm1VaS5pVXoQ0Gw0PSsjA5tA7ecnlDPRkMhoc0\nz/zNW4PAcx6SpGqGhySpmuEhSapmeEiSqhkekqRqhockqdqs4RERb4qIByLifyLi8Yj4x1J+bkT8\nMCJGI+JbEXFqKX9jeT5aXl/TcawbS/nTEXFJR/nGUjYaEds6yru2IUlqVi8zj1eBD2Tmu4HzgI0R\nsQH4AnBrZr4DeAm4rtS/DniplN9a6hER64CrgHcCG4GvRcQpEXEK8FXgUmAd8NFSlxnakBqxZtu9\nJx7SIJs1PLJtvDx9Q3kk8AHg26V8J3BF2d5UnlNevygiopTflZmvZuZPgVHggvIYzcxnM/M3wF3A\nprLPdG1IkhrU0zfMy+zgYeAdtGcJ/wv8IjMnSpVDwKqyvQp4DiAzJyLiGPC2Un6g47Cd+zw3qfzC\nss90bUzu3xZgC8DQ0BCtVquXt7UkjY+PL+v3N1eLNS5b10/MXmkai/331tRn5WTGaD51e+/+++lu\nfHx89kqT9BQemflb4LyIOAO4G/jD6pYWUGZuB7YDDA8P58jISLMdWkCtVovl/P7marHG5dqTWK46\nePXI/HWkB019Vk5mjOZTt/H23093cwnUqqutMvMXwH7gj4AzIuJ4+KwGxsr2GHAOQHn9rcCLneWT\n9pmu/MUZ2pAkNaiXq63OLjMOIuI04IPAk7RD5MOl2mbgnrK9uzynvP79zMxSflW5GutcYC3wAPAg\nsLZcWXUq7ZPqu8s+07UhSWpQL8tWK4Gd5bzH64BdmfmdiHgCuCsiPgf8GLi91L8d+LeIGAWO0g4D\nMvPxiNgFPAFMANeX5TAi4gZgL3AKsCMzHy/H+sw0bUiSGjRreGTmI8B7upQ/S/tKqcnlvwY+Ms2x\nPg98vkv5HmBPr21Ikprl/+chzcLvdEhTGR7SIukMIf+XOy113ttKklTNmYekk+bS3uBx5iFJqubM\nQ2qA5z+01BkekpYlA3phuWwlSapmeEiSqhkekqRqhockqZrhIUmqZnhIkqp5qa7UsKV6SanfKh9s\nzjwkSdUMD0lSNcNDklTN8JAkVTM8JEnVDA9JUjXDQ5JUzfCQJFUzPCRJ1QwPSVI1b08iqWfekkTH\nOfOQJFUzPCRJ1QwPSVI1z3lIWvaOn6vZun6CkWa7smw485AkVTM8JEnVDA9JUjXPeWigLdX/AlZq\n2qwzj4g4JyL2R8QTEfF4RHyqlJ8ZEfsi4pny54pSHhHxlYgYjYhHIuK9HcfaXOo/ExGbO8rPj4hH\nyz5fiYiYqQ1JUrN6WbaaALZm5jpgA3B9RKwDtgH3Z+Za4P7yHOBSYG15bAFug3YQADcBFwIXADd1\nhMFtwCc69ttYyqdrQ5LUoFnDIzMPZ+aPyvYvgSeBVcAmYGepthO4omxvAu7MtgPAGRGxErgE2JeZ\nRzPzJWAfsLG89pbMPJCZCdw56Vjd2pAkNajqhHlErAHeA/wQGMrMw+Wl54Ghsr0KeK5jt0OlbKby\nQ13KmaENSVKDej5hHhG/B/wH8NeZ+XI5LQFAZmZE5AL0r6c2ImIL7SUyhoaGaLVaC9mVRo2Pjy/r\n9zdXcx2XresnTmxPt39nnYU2n3+38/VZeXTs2IntretP+nCNGjptfsd4uRgfH6/ep6fwiIg30A6O\nf8/M/yzFL0TEysw8XJaejpTyMeCcjt1Xl7IxeM2XO1cDrVK+ukv9mdp4jczcDmwHGB4ezpGRkW7V\nloVWq8Vyfn9zNddxubbzaquru+9/7SLeSXa6PszFfH1WFvP9L7St6ye40n8/U8wlUHu52iqA24En\nM/OfOl7aDRy/YmozcE9H+TXlqqsNwLGy9LQXuDgiVpQT5RcDe8trL0fEhtLWNZOO1a0NSVKDepl5\nvA/4GPBoRPyklP0tcAuwKyKuA34GXFle2wNcBowCvwI+DpCZRyPiZuDBUu+zmXm0bH8SuAM4Dbiv\nPJihDUlSg2YNj8z8byCmefmiLvUTuH6aY+0AdnQpfwh4V5fyF7u1IS1XfmlRS4W3J5EkVTM8JEnV\nDA9JUjXDQ5JUzbvqSoUnq6XeOfOQJFVz5qGBs2YZfWNa9Zxhzg/DQ9IUBqxm47KVJKma4SFJqmZ4\nSJKqec5D6sI1f2lmzjwkSdUMD0lSNcNDklTNcx5Sn/LLbOpnzjwkSdWceWggePWUNL+ceUiSqjnz\nkDSwPK80d848JEnVnHlIS4C/IavfGB6SAC8qUB2XrSRJ1QwPSVI1w0OSVM1zHtISM/nchCfQ1QRn\nHpKkaoaHJKma4SFJquY5D0nCL2LWMjykAeYXAzVXLltJkqo589Cy5W/V0sKZdeYRETsi4khEPNZR\ndmZE7IuIZ8qfK0p5RMRXImI0Ih6JiPd27LO51H8mIjZ3lJ8fEY+Wfb4SETFTG5Kk5vWybHUHsHFS\n2Tbg/sxcC9xfngNcCqwtjy3AbdAOAuAm4ELgAuCmjjC4DfhEx34bZ2lDUoc12+498ZAWy6zhkZk/\nAI5OKt4E7CzbO4ErOsrvzLYDwBkRsRK4BNiXmUcz8yVgH7CxvPaWzDyQmQncOelY3dqQJDVsrifM\nhzLzcNl+Hhgq26uA5zrqHSplM5Uf6lI+UxuSpIad9AnzzMyIyPnozFzbiIgttJfJGBoaotVqLWR3\nGjU+Pr6s399cdRuXresnmulMgzrHoJfPyqCN0dBpvb3nQfs3Nj4+Xr3PXMPjhYhYmZmHy9LTkVI+\nBpzTUW91KRsDRiaVt0r56i71Z2pjiszcDmwHGB4ezpGRkemqLnmtVovl/P7m6vi4vHbdf/AuJjx4\n9ciJ7V4+K9cO2HmSresn+NKjs38uOsdxEMwlLOe6bLUbOH7F1Gbgno7ya8pVVxuAY2XpaS9wcUSs\nKCfKLwb2ltdejogN5SqrayYdq1sbkqSGzRrBEfFN2rOGsyLiEO2rpm4BdkXEdcDPgCtL9T3AZcAo\n8Cvg4wCZeTQibgYeLPU+m5nHT8J/kvYVXacB95UHM7QhaRqdM687Np4+ax1prmYNj8z86DQvXdSl\nbgLXT3OcHcCOLuUPAe/qUv5itzYk9ebRsWMnlqW8V1Md73M1O29PIkmqZnhoyVuz7V4eHTvmcoy0\niAwPSVK1wbuWURpAzso035x5SJKqGR6SpGqGhySpmuc8tCS5hi81y5mHJKma4SFJqmZ4SJKqec5D\nkmbgfa66Mzy0ZHiSXOofhof6lmEh9S/PeUiSqhkekqRqhockqZrhIUmq5glzNc4T49LSY3hIUo/8\nzsfvGB5qhLMNaWnznIckqZrhIUmq5rKVFo1LVdLy4cxDklTN8JAkVXPZSgvKpSotV4N+2a7hoXln\nYEjLn8tWkqRqzjw0Z84wpMFleKiKgSFNNYjnPwwPzcrAkDSZ4aETBvG3J0lzY3hI0jwalF/C+j48\nImIj8GXgFODrmXlLw11aVqZbknKpStJM+jo8IuIU4KvAB4FDwIMRsTszn2i2Z0uPYSAtvuU8C+nr\n8AAuAEYz81mAiLgL2AQMZHis2XYvW9dPcO22e1/zQTQYpP633IKk38NjFfBcx/NDwIUN9WVenewP\nfANDWrqWQ5BEZjbdh2lFxIeBjZn5l+X5x4ALM/OGSfW2AFvK0z8Anl7Uji6us4CfN92JPuS4TOWY\nTOWYdHcWcHpmnt3rDv0+8xgDzul4vrqUvUZmbge2L1anmhQRD2XmcNP96DeOy1SOyVSOSXdlXNbU\n7NPv97Z6EFgbEedGxKnAVcDuhvskSQOvr2cemTkRETcAe2lfqrsjMx9vuFuSNPD6OjwAMnMPsKfp\nfvSRgViemwPHZSrHZCrHpLvqcenrE+aSpP7U7+c8JEl9yPBYIiLiIxHxeET8X0QMT3rtxogYjYin\nI+KSpvrYpIj4h4gYi4iflMdlTfepKRGxsXwWRiNiW9P96RcRcTAiHi2fj4ea7k8TImJHRByJiMc6\nys6MiH0R8Uz5c0UvxzI8lo7HgD8HftBZGBHraF+F9k5gI/C1cluXQXRrZp5XHgN5nqzjlj6XAuuA\nj5bPiNr+uHw+BvVy3Tto/5zotA24PzPXAveX57MyPJaIzHwyM7t9+XETcFdmvpqZPwVGad/WRYPp\nxC19MvM3wPFb+khk5g+Ao5OKNwE7y/ZO4IpejmV4LH3dbuGyqqG+NO2GiHikTM17mnovQ34eppfA\n9yLi4XJXCrUNZebhsv08MNTLTn1/qe4giYj/An6/y0t/l5n3LHZ/+s1M4wPcBtxM+wfEzcCXgL9Y\nvN5pCXh/Zo5FxNuBfRHxVPlNXEVmZkT0dAmu4dFHMvNP5rBbT7dwWQ56HZ+I+FfgOwvcnX41MJ+H\nWpk5Vv48EhF3017iMzzghYhYmZmHI2IlcKSXnVy2Wvp2A1dFxBsj4lxgLfBAw31adOVDf9yHaF9g\nMIi8pU8XEXF6RLz5+DZwMYP7GZlsN7C5bG8GelrlcOaxRETEh4B/Ac4G7o2In2TmJZn5eETsov1/\nnEwA12fmb5vsa0O+GBHn0V62Ogj8VbPdaYa39JnWEHB3RED75943MvO7zXZp8UXEN4ER4KyIOATc\nBNwC7IqI64CfAVf2dCy/YS5JquWylSSpmuEhSapmeEiSqhkekqRqhockqZrhIUmqZnhIkqoZHpKk\nav8PWhx2pwP674UAAAAASUVORK5CYII=\n", "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -59,1276 +121,654 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "metric = \"Internet\"\n", - "milan_data = milan_data[[\"Grid\", \"Country\", metric]]\n", - "milan_data = milan_data[np.isfinite(milan_data[metric])]\n", - "milan_data[metric] = np.log(milan_data[metric])\n", - "# data = data.head(1000000)" + "# milan_data_mod = milan_data.set_index(pd.DatetimeIndex(milan_data['Time']))\n", + "milan_data_mod = milan_data[[\"Grid\", \"Country\", \"Time\", metric]]\n", + "milan_data_mod = milan_data_mod[np.isfinite(milan_data_mod[metric])]\n", + "milan_data_mod[\"Time\"] = pd.to_datetime(milan_data_mod[\"Time\"], unit=\"ms\")" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Moments\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/site-packages/pandas/core/indexing.py:537: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " self.obj[item] = s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Log moments\n", + "Min/max\n", + "Group By\n", + " Grid Country Time min max lmin lmax \\\n", + "0 1 32 2013-11-01 12:00:00 0.001787 0.001787 -6.327044 -6.327044 \n", + "1 1 32 2013-11-02 00:00:00 0.026137 0.026137 -3.644387 -3.644387 \n", + "2 1 32 2013-11-02 18:00:00 0.026137 0.026137 -3.644387 -3.644387 \n", + "3 1 32 2013-11-03 06:00:00 0.001787 0.001787 -6.327044 -6.327044 \n", + "4 1 32 2013-11-03 12:00:00 0.001787 0.001787 -6.327044 -6.327044 \n", + "\n", + " m0 m1 m2 m3 m4 m5 lm0 \\\n", + "0 1 0.001787 0.000003 5.709522e-09 1.020469e-11 1.823894e-14 1 \n", + "1 1 0.026137 0.000683 1.785617e-05 4.667143e-07 1.219871e-08 1 \n", + "2 1 0.026137 0.000683 1.785617e-05 4.667143e-07 1.219871e-08 1 \n", + "3 1 0.001787 0.000003 5.709522e-09 1.020469e-11 1.823894e-14 1 \n", + "4 2 0.003575 0.000006 1.141904e-08 2.040937e-11 3.647788e-14 2 \n", + "\n", + " lm1 lm2 lm3 lm4 lm5 \n", + "0 -6.327044 40.031480 -253.280915 1602.519371 -10139.209808 \n", + "1 -3.644387 13.281557 -48.403137 176.399768 -642.869039 \n", + "2 -3.644387 13.281557 -48.403137 176.399768 -642.869039 \n", + "3 -6.327044 40.031480 -253.280915 1602.519371 -10139.209808 \n", + "4 -12.654087 80.062960 -506.561829 3205.038742 -20278.419617 \n", + "CPU times: user 28 s, sys: 27.3 s, total: 55.3 s\n", + "Wall time: 1min 31s\n" + ] + } + ], + "source": [ + "%%time\n", + "milan_cube = milan_data_mod.iloc[:12000000,:]\n", + "\n", + "print('Moments')\n", + "milan_cube.loc[:,'m0'] = 1\n", + "milan_cube.loc[:,'m1'] = milan_cube[metric]\n", + "milan_cube.loc[:,'m2'] = milan_cube['m1'] ** 2\n", + "milan_cube.loc[:,'m3'] = milan_cube['m1'] * milan_cube['m2']\n", + "milan_cube.loc[:,'m4'] = milan_cube['m2'] ** 2\n", + "milan_cube.loc[:,'m5'] = milan_cube['m2'] * milan_cube['m3']\n", + "\n", + "print('Log moments')\n", + "milan_cube.loc[:,'lm0'] = 1\n", + "milan_cube.loc[:,'lm1'] = np.log(milan_cube[metric])\n", + "milan_cube.loc[:,'lm2'] = milan_cube['lm1'] ** 2\n", + "milan_cube.loc[:,'lm3'] = milan_cube['lm1'] * milan_cube['lm2']\n", + "milan_cube.loc[:,'lm4'] = milan_cube['lm2'] ** 2\n", + "milan_cube.loc[:,'lm5'] = milan_cube['lm2'] * milan_cube['lm3']\n", + "\n", + "print('Min/max')\n", + "milan_cube.loc[:,'min'] = milan_cube['m1']\n", + "milan_cube.loc[:,'max'] = milan_cube['m1']\n", + "milan_cube.loc[:,'lmin'] = milan_cube['lm1']\n", + "milan_cube.loc[:,'lmax'] = milan_cube['lm1']\n", + "\n", + "print('Group By')\n", + "milan_cube = milan_cube.groupby([\"Grid\", \"Country\"] + [pd.Grouper(key=\"Time\", freq='6H')]).agg({\n", + " 'min': 'min',\n", + " 'max': 'max',\n", + " 'lmin': 'min',\n", + " 'lmax': 'max',\n", + " 'm0': 'sum',\n", + " 'm1': 'sum',\n", + " 'm2': 'sum',\n", + " 'm3': 'sum',\n", + " 'm4': 'sum',\n", + " 'm5': 'sum',\n", + " 'lm0': 'sum',\n", + " 'lm1': 'sum',\n", + " 'lm2': 'sum',\n", + " 'lm3': 'sum',\n", + " 'lm4': 'sum',\n", + " 'lm5': 'sum'\n", + "}).reset_index(col_level=0)\n", + "milan_cube.columns = milan_cube.columns.get_level_values(0)\n", + "print(milan_cube.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12000000" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "milan_cube['m0'].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# # Old cube creation\n", + "# milan_cube = milan_data_mod.groupby([\"Grid\", \"Country\"] + [pd.Grouper(key=\"Time\", freq='H')]).agg({metric: [\n", + "# 'min',\n", + "# 'max',\n", + "# log_min(),\n", + "# log_max(),\n", + "# moment(0),\n", + "# moment(1),\n", + "# moment(2),\n", + "# moment(3),\n", + "# moment(4),\n", + "# moment(5),\n", + "# log_moment(0),\n", + "# log_moment(1),\n", + "# log_moment(2),\n", + "# log_moment(3),\n", + "# log_moment(4),\n", + "# log_moment(5)\n", + "# ]}).reset_index(col_level=1)\n", + "# milan_cube.columns = milan_cube.columns.get_level_values(1)\n", + "# milan_cube" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Grid 10000\n", + "Country 133\n", + "Time 21\n", + "min 257642\n", + "max 369555\n", + "lmin 257556\n", + "lmax 369348\n", + "m0 36\n", + "m1 716164\n", + "m2 750658\n", + "m3 758154\n", + "m4 759665\n", + "m5 759261\n", + "lm0 36\n", + "lm1 756516\n", + "lm2 757599\n", + "lm3 757900\n", + "lm4 758072\n", + "lm5 758419\n", + "dtype: int64" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "milan_cube.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "milan_cube.to_csv('~/msketch/javamsketch/src/test/resources/milan_12M_6H_cubed.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2min 47s, sys: 37.5 s, total: 3min 24s\n", + "Wall time: 4min 43s\n" + ] + } + ], + "source": [ + "%%time\n", + "milan_grouped = milan_data_mod.iloc[:12000000,:]\n", + "milan_grouped = milan_grouped.groupby([\"Grid\", \"Country\"] + [pd.Grouper(key=\"Time\", freq='6H')])\n", + "milan_grouped = milan_grouped[metric].apply(list).reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Grid 1548982\n", + "Country 1548982\n", + "Time 1548982\n", + "Internet 1548982\n", + "dtype: int64" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "milan_grouped.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "milan_grouped.to_csv('~/msketch/javamsketch/src/test/resources/milan_grouped_12M_6H.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "# t1 = milan_data[metric].quantile(0.99)\n", + "# t5 = milan_data[metric].quantile(0.95)\n", + "# t10 = milan_data[metric].quantile(0.90)\n", + "# milan_oracle = milan_data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", + "# 'count',\n", + "# outliers(t1, \"1\"),\n", + "# outliers(t5, \"5\"),\n", + "# outliers(t10, \"10\")\n", + "# ]}).reset_index(col_level=1)\n", + "# milan_oracle.columns = milan_oracle.columns.get_level_values(1)\n", + "# milan_oracle" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "milan_oracle.to_csv('lib/src/test/resources/milan_oracle_cubed.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sample" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "sample = pd.read_csv('~/msketch/javamsketch/src/test/resources/sample.csv')\n", + "sample = sample.groupby([\"location\", \"version\"])\n", + "sample = sample[\"usage\"].apply(list).reset_index()\n", + "sample.to_csv('~/msketch/javamsketch/src/test/resources/sample_grouped.csv')" ] }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 129, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", - "\n", "\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + "
GridCountryminmaxm0m1m2m3m4m5m6m7m8usagelatencylocationversion
0132-6.327044-6.3270441.0-6.32704440.031480-253.2809151602.519371-10139.20980864151.221753-4.058876e+052.568068e+06
1133-3.644387-3.6443875.0-18.22193666.407787-242.015683881.998838-3214.34519611714.318207-4.269151e+041.555844e+05
21391.5374672.987433144.0330.013292773.5542321848.8587984492.68869211070.83949227604.8278656.952730e+041.766393e+05
3146-6.327044-3.64438714.0-61.752045292.941493-1497.1550268174.075159-46985.529626280033.523428-1.708933e+061.058344e+07
4149-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+04
5232-6.989444-6.9894441.0-6.98944448.852333-341.4506632386.550423-16680.661453116588.555547-8.148892e+055.695623e+0668646.40787AUSv3
6233-3.598820-3.5988205.0-17.99410064.757529-233.050692838.707500-3018.35736410862.524979-3.909227e+041.406861e+0571046.49976UKv2
72391.5377712.990041144.0330.521887775.9847831857.7189214521.80606011161.60652027879.1220747.033982e+041.790136e+0556346.54429AUSv4
8246-6.989444-3.59882014.0-63.945978324.924388-1831.90403711223.616691-72759.360538488079.272146-3.337741e+062.306386e+0746346.61684AUSv4
9249-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+048446.69209AUSv3
10333-3.552494-3.5524945.0-17.76247063.101069-224.166170796.348978-2829.02498210050.094333-3.570290e+041.268343e+0578246.82514RUSv4
113391.5380942.992809144.0331.060698778.5645771867.1411614552.83122011258.51179428172.5489377.121076e+041.815637e+052346.88836AUSv3
12346-3.552494-3.55249410.0-35.524940126.202138-448.3323401592.697956-5658.04996420100.188667-7.140580e+042.536687e+0565447.17317CANv2
13349-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+0431247.86680RUSv4
14432-5.450309-5.4503091.0-5.45030929.705873-161.906200882.438887-4809.56499426213.617470-1.428723e+057.786984e+0568947.89642AUSv3
15433-3.789458-3.7894585.0-18.94729171.799965-272.0829601031.046984-3907.10937714805.827395-5.610606e+042.126116e+0533049.22464RUSv4
164391.5365872.979840144.0328.526707766.4754811823.1491704408.51271110809.42655026817.8402826.720492e+041.698787e+0533150.04735RUSv4
17446-5.450309-3.78945814.0-59.695819262.423421-1191.7907185591.849518-27052.478731134466.124669-6.837014e+053.540017e+06
18449-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+04
19532-5.314323-5.3143231.0-5.31432328.242029-150.087267797.612217-4238.76897422526.187576-1.197114e+056.361852e+05
20533-3.829188-3.8291885.0-19.14593873.313386-280.7307031074.970507-4116.26366115761.945491-6.035545e+042.311123e+05
215391.4468712.867586144.0315.103022706.3175271616.2021733762.9464958889.25808421256.2531215.135412e+041.251688e+05
\n", + "
" + ], + "text/plain": [ + " usage latency location version\n", + "686 46.40 787 AUS v3\n", + "710 46.49 976 UK v2\n", + "563 46.54 429 AUS v4\n", + "463 46.61 684 AUS v4\n", + "84 46.69 209 AUS v3\n", + "782 46.82 514 RUS v4\n", + "23 46.88 836 AUS v3\n", + "654 47.17 317 CAN v2\n", + "312 47.86 680 RUS v4\n", + "689 47.89 642 AUS v3\n", + "330 49.22 464 RUS v4\n", + "331 50.04 735 RUS v4" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample = pd.read_csv('~/msketch/javamsketch/src/test/resources/sample.csv')\n", + "cutoff = sample[\"usage\"].quantile(0.99)\n", + "sample[sample[\"usage\"] >= 46.4].sort_values([\"usage\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
usagelatency
22546-5.314323-3.82918814.0-59.549167259.594889-1161.8104725340.389882-25187.603218121628.641284-5.995566e+053.006966e+06locationversion
23549-3.744196-3.7441963.0-11.23258942.057016-157.469718589.597519-2207.5687928265.570682-3.094792e+041.158751e+05AUSv3150150
24633-3.552494-3.5524945.0-17.76247063.101069-224.166170796.348978-2829.02498210050.094333-3.570290e+041.268343e+05v45050
256391.5380942.992809144.0331.060698778.5645771867.1411614552.83122011258.51179428172.5489377.121076e+041.815637e+05CANv15050
26646-3.552494-3.55249410.0-35.524940126.202138-448.3323401592.697956-5658.04996420100.188667-7.140580e+042.536687e+05v2150150
27649-3.600852-3.6008523.0-10.80255538.898396-140.067348504.361727-1816.1317076539.620669-2.354820e+048.479358e+04v32020
28733-3.552494-3.5524945.0-17.76247063.101069-224.166170796.348978-2829.02498210050.094333-3.570290e+041.268343e+05RUSv4200200
297391.5380942.992809144.0331.060698778.5645771867.1411614552.83122011258.51179428172.5489377.121076e+041.815637e+05UKv2100100
..........................................v3100100
132533999786-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132534999788239-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
132535999833-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132536999834-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
1325379998392.9455794.403509144.0524.7800661929.3742097154.11544926746.073877100782.539685382634.3992171.463212e+065.633865e+06
132538999841-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132539999846-2.453469-2.4534694.0-9.81387524.078036-59.074709144.937953-355.600742872.455313-2.140542e+035.251753e+03
132540999849-2.453469-2.4534696.0-14.72081336.117054-88.612064217.406930-533.4011131308.682969-3.210813e+037.877629e+03
132541999886-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132542999888239-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
132543999933-2.453469-1.3231284.0-8.62007619.499824-45.491191108.091651-259.552480626.934249-1.519298e+033.688524e+03
132544999934-4.219398-1.7443314.0-10.83893832.730342-109.398491396.846705-1525.7198956090.915444-2.488246e+041.030442e+05
1325459999392.3365244.146807144.0461.0384861501.3484304968.00755816687.96075256846.694098196182.2147336.852638e+052.420556e+06
132546999941-2.453469-2.4217404.0-9.71868923.613982-57.377915139.422981-338.795878823.296205-2.000730e+034.862226e+03
132547999943-2.421740-2.4217401.0-2.4217405.864824-14.20307934.396164-83.298564201.727459-4.885314e+021.183096e+03
132548999944-2.421740-2.4217401.0-2.4217405.864824-14.20307934.396164-83.298564201.727459-4.885314e+021.183096e+03
132549999946-4.219398-2.4534698.0-26.69146695.291302-359.5517961412.770269-5705.08943023444.075302-9.737918e+044.071014e+05
132550999949-4.219398-2.4534699.0-27.37900689.527003-313.9698791168.281167-4545.51762918237.397961-7.463979e+043.092649e+05
132551999986-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
132552999988239-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03
1325531000033-2.453469-0.4704264.0-6.06197211.164573-22.59835848.405187-107.942676247.966919-5.819642e+021.386409e+03
1325541000034-2.914497-1.2233594.0-8.16036318.472290-45.218936116.688164-311.439652849.275503-2.348909e+036.560741e+03
13255510000392.1005004.289594144.0425.4271731283.2363483949.88945612400.18335939683.914847129402.7936314.297711e+051.453222e+06
1325561000041-2.453469-1.5690394.0-7.16058413.405154-26.35703954.417075-117.429363262.877207-6.053709e+021.423140e+03
1325571000043-1.569039-1.5690391.0-1.5690392.461882-3.8627876.060862-9.50972614.921126-2.341182e+013.673405e+01
1325581000044-1.569039-1.5690391.0-1.5690392.461882-3.8627876.060862-9.50972614.921126-2.341182e+013.673405e+01
1325591000046-2.914497-2.4534698.0-21.47186258.055200-158.101040433.549868-1196.7592123324.008890-9.285587e+032.607596e+04
1325601000049-2.914497-2.4534699.0-23.46430361.599927-162.881812433.865866-1164.2699663147.348152-8.569596e+032.349579e+04
1325611000086-2.453469-2.4534691.0-2.4534696.019509-14.76867736.234488-88.900185218.113828-5.351355e+021.312938e+03
1325621000088239-2.453469-2.4534692.0-4.90693812.039018-29.53735572.468977-177.800371436.227656-1.070271e+032.625876e+03USAv1200200
\n", - "

132563 rows × 13 columns

\n", "
" ], "text/plain": [ - " Grid Country min max m0 m1 m2 \\\n", - "0 1 32 -6.327044 -6.327044 1.0 -6.327044 40.031480 \n", - "1 1 33 -3.644387 -3.644387 5.0 -18.221936 66.407787 \n", - "2 1 39 1.537467 2.987433 144.0 330.013292 773.554232 \n", - "3 1 46 -6.327044 -3.644387 14.0 -61.752045 292.941493 \n", - "4 1 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", - "5 2 32 -6.989444 -6.989444 1.0 -6.989444 48.852333 \n", - "6 2 33 -3.598820 -3.598820 5.0 -17.994100 64.757529 \n", - "7 2 39 1.537771 2.990041 144.0 330.521887 775.984783 \n", - "8 2 46 -6.989444 -3.598820 14.0 -63.945978 324.924388 \n", - "9 2 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", - "10 3 33 -3.552494 -3.552494 5.0 -17.762470 63.101069 \n", - "11 3 39 1.538094 2.992809 144.0 331.060698 778.564577 \n", - "12 3 46 -3.552494 -3.552494 10.0 -35.524940 126.202138 \n", - "13 3 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", - "14 4 32 -5.450309 -5.450309 1.0 -5.450309 29.705873 \n", - "15 4 33 -3.789458 -3.789458 5.0 -18.947291 71.799965 \n", - "16 4 39 1.536587 2.979840 144.0 328.526707 766.475481 \n", - "17 4 46 -5.450309 -3.789458 14.0 -59.695819 262.423421 \n", - "18 4 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", - "19 5 32 -5.314323 -5.314323 1.0 -5.314323 28.242029 \n", - "20 5 33 -3.829188 -3.829188 5.0 -19.145938 73.313386 \n", - "21 5 39 1.446871 2.867586 144.0 315.103022 706.317527 \n", - "22 5 46 -5.314323 -3.829188 14.0 -59.549167 259.594889 \n", - "23 5 49 -3.744196 -3.744196 3.0 -11.232589 42.057016 \n", - "24 6 33 -3.552494 -3.552494 5.0 -17.762470 63.101069 \n", - "25 6 39 1.538094 2.992809 144.0 331.060698 778.564577 \n", - "26 6 46 -3.552494 -3.552494 10.0 -35.524940 126.202138 \n", - "27 6 49 -3.600852 -3.600852 3.0 -10.802555 38.898396 \n", - "28 7 33 -3.552494 -3.552494 5.0 -17.762470 63.101069 \n", - "29 7 39 1.538094 2.992809 144.0 331.060698 778.564577 \n", - "... ... ... ... ... ... ... ... \n", - "132533 9997 86 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", - "132534 9997 88239 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", - "132535 9998 33 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", - "132536 9998 34 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", - "132537 9998 39 2.945579 4.403509 144.0 524.780066 1929.374209 \n", - "132538 9998 41 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", - "132539 9998 46 -2.453469 -2.453469 4.0 -9.813875 24.078036 \n", - "132540 9998 49 -2.453469 -2.453469 6.0 -14.720813 36.117054 \n", - "132541 9998 86 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", - "132542 9998 88239 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", - "132543 9999 33 -2.453469 -1.323128 4.0 -8.620076 19.499824 \n", - "132544 9999 34 -4.219398 -1.744331 4.0 -10.838938 32.730342 \n", - "132545 9999 39 2.336524 4.146807 144.0 461.038486 1501.348430 \n", - "132546 9999 41 -2.453469 -2.421740 4.0 -9.718689 23.613982 \n", - "132547 9999 43 -2.421740 -2.421740 1.0 -2.421740 5.864824 \n", - "132548 9999 44 -2.421740 -2.421740 1.0 -2.421740 5.864824 \n", - "132549 9999 46 -4.219398 -2.453469 8.0 -26.691466 95.291302 \n", - "132550 9999 49 -4.219398 -2.453469 9.0 -27.379006 89.527003 \n", - "132551 9999 86 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", - "132552 9999 88239 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", - "132553 10000 33 -2.453469 -0.470426 4.0 -6.061972 11.164573 \n", - "132554 10000 34 -2.914497 -1.223359 4.0 -8.160363 18.472290 \n", - "132555 10000 39 2.100500 4.289594 144.0 425.427173 1283.236348 \n", - "132556 10000 41 -2.453469 -1.569039 4.0 -7.160584 13.405154 \n", - "132557 10000 43 -1.569039 -1.569039 1.0 -1.569039 2.461882 \n", - "132558 10000 44 -1.569039 -1.569039 1.0 -1.569039 2.461882 \n", - "132559 10000 46 -2.914497 -2.453469 8.0 -21.471862 58.055200 \n", - "132560 10000 49 -2.914497 -2.453469 9.0 -23.464303 61.599927 \n", - "132561 10000 86 -2.453469 -2.453469 1.0 -2.453469 6.019509 \n", - "132562 10000 88239 -2.453469 -2.453469 2.0 -4.906938 12.039018 \n", - "\n", - " m3 m4 m5 m6 m7 \\\n", - "0 -253.280915 1602.519371 -10139.209808 64151.221753 -4.058876e+05 \n", - "1 -242.015683 881.998838 -3214.345196 11714.318207 -4.269151e+04 \n", - "2 1848.858798 4492.688692 11070.839492 27604.827865 6.952730e+04 \n", - "3 -1497.155026 8174.075159 -46985.529626 280033.523428 -1.708933e+06 \n", - "4 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", - "5 -341.450663 2386.550423 -16680.661453 116588.555547 -8.148892e+05 \n", - "6 -233.050692 838.707500 -3018.357364 10862.524979 -3.909227e+04 \n", - "7 1857.718921 4521.806060 11161.606520 27879.122074 7.033982e+04 \n", - "8 -1831.904037 11223.616691 -72759.360538 488079.272146 -3.337741e+06 \n", - "9 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", - "10 -224.166170 796.348978 -2829.024982 10050.094333 -3.570290e+04 \n", - "11 1867.141161 4552.831220 11258.511794 28172.548937 7.121076e+04 \n", - "12 -448.332340 1592.697956 -5658.049964 20100.188667 -7.140580e+04 \n", - "13 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", - "14 -161.906200 882.438887 -4809.564994 26213.617470 -1.428723e+05 \n", - "15 -272.082960 1031.046984 -3907.109377 14805.827395 -5.610606e+04 \n", - "16 1823.149170 4408.512711 10809.426550 26817.840282 6.720492e+04 \n", - "17 -1191.790718 5591.849518 -27052.478731 134466.124669 -6.837014e+05 \n", - "18 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", - "19 -150.087267 797.612217 -4238.768974 22526.187576 -1.197114e+05 \n", - "20 -280.730703 1074.970507 -4116.263661 15761.945491 -6.035545e+04 \n", - "21 1616.202173 3762.946495 8889.258084 21256.253121 5.135412e+04 \n", - "22 -1161.810472 5340.389882 -25187.603218 121628.641284 -5.995566e+05 \n", - "23 -157.469718 589.597519 -2207.568792 8265.570682 -3.094792e+04 \n", - "24 -224.166170 796.348978 -2829.024982 10050.094333 -3.570290e+04 \n", - "25 1867.141161 4552.831220 11258.511794 28172.548937 7.121076e+04 \n", - "26 -448.332340 1592.697956 -5658.049964 20100.188667 -7.140580e+04 \n", - "27 -140.067348 504.361727 -1816.131707 6539.620669 -2.354820e+04 \n", - "28 -224.166170 796.348978 -2829.024982 10050.094333 -3.570290e+04 \n", - "29 1867.141161 4552.831220 11258.511794 28172.548937 7.121076e+04 \n", - "... ... ... ... ... ... \n", - "132533 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", - "132534 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", - "132535 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", - "132536 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", - "132537 7154.115449 26746.073877 100782.539685 382634.399217 1.463212e+06 \n", - "132538 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", - "132539 -59.074709 144.937953 -355.600742 872.455313 -2.140542e+03 \n", - "132540 -88.612064 217.406930 -533.401113 1308.682969 -3.210813e+03 \n", - "132541 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", - "132542 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", - "132543 -45.491191 108.091651 -259.552480 626.934249 -1.519298e+03 \n", - "132544 -109.398491 396.846705 -1525.719895 6090.915444 -2.488246e+04 \n", - "132545 4968.007558 16687.960752 56846.694098 196182.214733 6.852638e+05 \n", - "132546 -57.377915 139.422981 -338.795878 823.296205 -2.000730e+03 \n", - "132547 -14.203079 34.396164 -83.298564 201.727459 -4.885314e+02 \n", - "132548 -14.203079 34.396164 -83.298564 201.727459 -4.885314e+02 \n", - "132549 -359.551796 1412.770269 -5705.089430 23444.075302 -9.737918e+04 \n", - "132550 -313.969879 1168.281167 -4545.517629 18237.397961 -7.463979e+04 \n", - "132551 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", - "132552 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", - "132553 -22.598358 48.405187 -107.942676 247.966919 -5.819642e+02 \n", - "132554 -45.218936 116.688164 -311.439652 849.275503 -2.348909e+03 \n", - "132555 3949.889456 12400.183359 39683.914847 129402.793631 4.297711e+05 \n", - "132556 -26.357039 54.417075 -117.429363 262.877207 -6.053709e+02 \n", - "132557 -3.862787 6.060862 -9.509726 14.921126 -2.341182e+01 \n", - "132558 -3.862787 6.060862 -9.509726 14.921126 -2.341182e+01 \n", - "132559 -158.101040 433.549868 -1196.759212 3324.008890 -9.285587e+03 \n", - "132560 -162.881812 433.865866 -1164.269966 3147.348152 -8.569596e+03 \n", - "132561 -14.768677 36.234488 -88.900185 218.113828 -5.351355e+02 \n", - "132562 -29.537355 72.468977 -177.800371 436.227656 -1.070271e+03 \n", - "\n", - " m8 \n", - "0 2.568068e+06 \n", - "1 1.555844e+05 \n", - "2 1.766393e+05 \n", - "3 1.058344e+07 \n", - "4 8.479358e+04 \n", - "5 5.695623e+06 \n", - "6 1.406861e+05 \n", - "7 1.790136e+05 \n", - "8 2.306386e+07 \n", - "9 8.479358e+04 \n", - "10 1.268343e+05 \n", - "11 1.815637e+05 \n", - "12 2.536687e+05 \n", - "13 8.479358e+04 \n", - "14 7.786984e+05 \n", - "15 2.126116e+05 \n", - "16 1.698787e+05 \n", - "17 3.540017e+06 \n", - "18 8.479358e+04 \n", - "19 6.361852e+05 \n", - "20 2.311123e+05 \n", - "21 1.251688e+05 \n", - "22 3.006966e+06 \n", - "23 1.158751e+05 \n", - "24 1.268343e+05 \n", - "25 1.815637e+05 \n", - "26 2.536687e+05 \n", - "27 8.479358e+04 \n", - "28 1.268343e+05 \n", - "29 1.815637e+05 \n", - "... ... \n", - "132533 1.312938e+03 \n", - "132534 2.625876e+03 \n", - "132535 1.312938e+03 \n", - "132536 2.625876e+03 \n", - "132537 5.633865e+06 \n", - "132538 1.312938e+03 \n", - "132539 5.251753e+03 \n", - "132540 7.877629e+03 \n", - "132541 1.312938e+03 \n", - "132542 2.625876e+03 \n", - "132543 3.688524e+03 \n", - "132544 1.030442e+05 \n", - "132545 2.420556e+06 \n", - "132546 4.862226e+03 \n", - "132547 1.183096e+03 \n", - "132548 1.183096e+03 \n", - "132549 4.071014e+05 \n", - "132550 3.092649e+05 \n", - "132551 1.312938e+03 \n", - "132552 2.625876e+03 \n", - "132553 1.386409e+03 \n", - "132554 6.560741e+03 \n", - "132555 1.453222e+06 \n", - "132556 1.423140e+03 \n", - "132557 3.673405e+01 \n", - "132558 3.673405e+01 \n", - "132559 2.607596e+04 \n", - "132560 2.349579e+04 \n", - "132561 1.312938e+03 \n", - "132562 2.625876e+03 \n", - "\n", - "[132563 rows x 13 columns]" + " usage latency\n", + "location version \n", + "AUS v3 150 150\n", + " v4 50 50\n", + "CAN v1 50 50\n", + " v2 150 150\n", + " v3 20 20\n", + "RUS v4 200 200\n", + "UK v2 100 100\n", + " v3 100 100\n", + "USA v1 200 200" ] }, - "execution_count": 119, + "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "def moment(n):\n", - " def moment_(x):\n", - " return np.sum(np.power(x, n))\n", - " moment_.__name__ = 'm%s' % n\n", - " return moment_\n", - "\n", - "milan_cube = milan_data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", - " 'min',\n", - " 'max',\n", - " moment(0),\n", - " moment(1),\n", - " moment(2),\n", - " moment(3),\n", - " moment(4),\n", - " moment(5),\n", - " moment(6),\n", - " moment(7),\n", - " moment(8)\n", - "]}).reset_index(col_level=1)\n", - "milan_cube.columns = milan_cube.columns.get_level_values(1)\n", - "milan_cube" + "sample = pd.read_csv('~/msketch/javamsketch/src/test/resources/sample.csv')\n", + "sample.groupby([\"location\", \"version\"]).count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Wikipedia" ] }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 156, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "wiki_data = pd.read_json('~/Downloads/wikipedia-2015-09-12', lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 148, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "milan_cube.to_csv('lib/src/test/resources/milan_moments_cubed.csv')" + "metric = \"delta\"\n", + "attributes = [\"channel\", \"countryName\", \"namespace\"]\n", + "wiki_data = wiki_data[attributes + [metric]]\n", + "wiki_data = wiki_data[(np.isfinite(wiki_data[metric])) & (wiki_data[metric] != 0)]\n", + "wiki_data[metric] = np.log(np.absolute(wiki_data[metric]))" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFf1JREFUeJzt3X+w3XWd3/Hna4msLK4GVnuHJkzDjBkdFqriHYi13bnK\nigEdwx+ug0MlWGr+EK3bYUZDOx2m/uiw03ZdaV07GckStlSWYXXICBoz0TNOZ4oCakFAyy2iJA3g\nGn5spKvN7rt/nE/wmO+9ueee3JtzLnk+Zs7c7/f9/Xy+9/PhhPO63x/nnFQVkiQN+o1xD0CSNHkM\nB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6Vo17AKN65StfWevWrRup789//nNO\nPfXUpR3QceYcJoNzmAzOYTj33XffX1XVq4Zpu2LDYd26ddx7770j9e31eszMzCztgI4z5zAZnMNk\ncA7DSfLjYdt6WkmS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktSxYt8hraW3\nbuudLyw/dv07xjgSSePmkYMkqcNwkCR1GA6SpI6hwiHJ6iS3J/lBkoeTvCnJ6Ul2J3mk/TyttU2S\nG5LMJrk/yXkD+9nc2j+SZPNA/Y1JHmh9bkiSpZ+qJGlYwx45fAb4alW9Fngd8DCwFdhTVeuBPW0d\n4GJgfXtsAT4HkOR04DrgAuB84LrDgdLafGCg38Zjm5Yk6VgsGA5JXgH8HnAjQFX9sqqeATYBO1qz\nHcClbXkTcHP13Q2sTnIG8HZgd1UdqKqngd3Axrbt5VV1d1UVcPPAviRJYzDMkcNZwE+BP0vy3SSf\nT3IqMFVV+1ubJ4CptrwGeHyg/95WO1p97xx1SdKYDPM+h1XAecCHq+pbST7Dr04hAVBVlaSWY4CD\nkmyhf6qKqakper3eSPs5ePDgyH0nxXLM4ZpzD72wfDz++/g8TAbnMBkmbQ7DhMNeYG9Vfaut304/\nHJ5MckZV7W+nhp5q2/cBZw70X9tq+4CZI+q9Vl87R/uOqtoGbAOYnp6uUb9Sz68UnNuVg2+Cu3xp\n9z0Xn4fJ4Bwmw6TNYcHTSlX1BPB4kte00oXAQ8BO4PAdR5uBO9ryTuCKdtfSBuDZdvppF3BRktPa\nheiLgF1t23NJNrS7lK4Y2JckaQyG/fiMDwO3JDkZeBR4P/1guS3JVcCPgfe0tncBlwCzwPOtLVV1\nIMkngHtau49X1YG2/EHgJuAU4CvtIUkak6HCoaq+B0zPsenCOdoWcPU8+9kObJ+jfi9wzjBjkSQt\nP98hLUnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1\nGA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUsdQ\n4ZDksSQPJPlekntb7fQku5M80n6e1upJckOS2ST3JzlvYD+bW/tHkmweqL+x7X+29c1ST1SSNLzF\nHDm8papeX1XTbX0rsKeq1gN72jrAxcD69tgCfA76YQJcB1wAnA9cdzhQWpsPDPTbOPKMJEnH7FhO\nK20CdrTlHcClA/Wbq+9uYHWSM4C3A7ur6kBVPQ3sBja2bS+vqrurqoCbB/YlSRqDYcOhgK8luS/J\nllabqqr9bfkJYKotrwEeH+i7t9WOVt87R12SNCarhmz3j6tqX5K/B+xO8oPBjVVVSWrph/frWjBt\nAZiamqLX6420n4MHD47cd1IsxxyuOffQC8vH47+Pz8NkcA6TYdLmMFQ4VNW+9vOpJF+if83gySRn\nVNX+dmroqdZ8H3DmQPe1rbYPmDmi3mv1tXO0n2sc24BtANPT0zUzMzNXswX1ej1G7TsplmMOV269\n84Xlxy5f2n3PxedhMjiHyTBpc1jwtFKSU5P89uFl4CLg+8BO4PAdR5uBO9ryTuCKdtfSBuDZdvpp\nF3BRktPaheiLgF1t23NJNrS7lK4Y2JckaQyGOXKYAr7U7i5dBfy3qvpqknuA25JcBfwYeE9rfxdw\nCTALPA+8H6CqDiT5BHBPa/fxqjrQlj8I3AScAnylPSRJY7JgOFTVo8Dr5qj/DLhwjnoBV8+zr+3A\n9jnq9wLnDDFeSdJx4DukJUkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaD\nJKnDcJAkdQz7fQ7SUa0b/Ljv698xxpFIWgoeOUiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6S\npA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHUOHQ5KTknw3yZfb+llJvpVkNslfJDm51X+zrc+27esG\n9nFtq/8wydsH6htbbTbJ1qWbniRpFIs5cvgI8PDA+h8Bn66qVwNPA1e1+lXA063+6daOJGcDlwG/\nC2wE/rQFzknAZ4GLgbOB97a2kqQxGSockqwF3gF8vq0HeCtwe2uyA7i0LW9q67TtF7b2m4Bbq+oX\nVfUjYBY4vz1mq+rRqvolcGtrK0kak2GPHP4E+Cjwd239d4BnqupQW98LrGnLa4DHAdr2Z1v7F+pH\n9JmvLkkakwW/7CfJO4Gnquq+JDPLP6SjjmULsAVgamqKXq830n4OHjw4ct9JsRxzuObcQy8sL3bf\no/T1eZgMzmEyTNochvkmuDcD70pyCfBS4OXAZ4DVSVa1o4O1wL7Wfh9wJrA3ySrgFcDPBuqHDfaZ\nr/5rqmobsA1genq6ZmZmhhh+V6/XY9S+k2I55nDl4Le5Xb64fY/S1+dhMjiHyTBpc1jwtFJVXVtV\na6tqHf0Lyl+vqsuBbwDvbs02A3e05Z1tnbb961VVrX5Zu5vpLGA98G3gHmB9u/vp5PY7di7J7CRJ\nIzmW75D+GHBrkk8C3wVubPUbgT9PMgscoP9iT1U9mOQ24CHgEHB1Vf0tQJIPAbuAk4DtVfXgMYxL\nknSMFhUOVdUDem35Ufp3Gh3Z5m+AP5in/6eAT81Rvwu4azFjkSQtH98hLUnqMBwkSR2GgySpw3CQ\nJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lS\nh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqWDAckrw0ybeT/M8kDyb5\nt61+VpJvJZlN8hdJTm7132zrs237uoF9XdvqP0zy9oH6xlabTbJ16acpSVqMYY4cfgG8tapeB7we\n2JhkA/BHwKer6tXA08BVrf1VwNOt/unWjiRnA5cBvwtsBP40yUlJTgI+C1wMnA28t7WVJI3JguFQ\nfQfb6kvao4C3Are3+g7g0ra8qa3Ttl+YJK1+a1X9oqp+BMwC57fHbFU9WlW/BG5tbSVJY7JqmEbt\nr/v7gFfT/yv/fwPPVNWh1mQvsKYtrwEeB6iqQ0meBX6n1e8e2O1gn8ePqF8wzzi2AFsApqam6PV6\nwwy/4+DBgyP3nRTLMYdrzj30wvJi9z1KX5+HyeAcJsOkzWGocKiqvwVen2Q18CXgtcs6qvnHsQ3Y\nBjA9PV0zMzMj7afX6zFq30mxHHO4cuudLyw/dvni9j1KX5+HyeAcJsOkzWFRdytV1TPAN4A3AauT\nHA6XtcC+trwPOBOgbX8F8LPB+hF95qtLksZkmLuVXtWOGEhyCvA24GH6IfHu1mwzcEdb3tnWadu/\nXlXV6pe1u5nOAtYD3wbuAda3u59Opn/ReudSTE6SNJphTiudAexo1x1+A7itqr6c5CHg1iSfBL4L\n3Nja3wj8eZJZ4AD9F3uq6sEktwEPAYeAq9vpKpJ8CNgFnARsr6oHl2yGkqRFWzAcqup+4A1z1B+l\nf6fRkfW/Af5gnn19CvjUHPW7gLuGGK8k6TjwHdKSpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaD\nJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiS\nOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1LBgOSc5M8o0kDyV5MMlHWv30JLuTPNJ+ntbqSXJDktkk\n9yc5b2Bfm1v7R5JsHqi/MckDrc8NSbIck5UkDWeYI4dDwDVVdTawAbg6ydnAVmBPVa0H9rR1gIuB\n9e2xBfgc9MMEuA64ADgfuO5woLQ2Hxjot/HYpyZJGtWC4VBV+6vqO235r4GHgTXAJmBHa7YDuLQt\nbwJurr67gdVJzgDeDuyuqgNV9TSwG9jYtr28qu6uqgJuHtiXJGkM0n89HrJxsg74JnAO8JOqWt3q\nAZ6uqtVJvgxcX1X/vW3bA3wMmAFeWlWfbPV/A/xfoNfa/36r/xPgY1X1zjl+/xb6RyNMTU298dZb\nb138jIGDBw/yspe9bKS+k2I55vDAvmdfWD53zSuWva/Pw2RwDpPheMzhLW95y31VNT1M21XD7jTJ\ny4C/BP6wqp4bvCxQVZVk+JQZUVVtA7YBTE9P18zMzEj76fV6jNp3UizHHK7ceucLy49dvrh9j9LX\n52EyOIfJMGlzGOpupSQvoR8Mt1TVF1v5yXZKiPbzqVbfB5w50H1tqx2tvnaOuiRpTIa5WynAjcDD\nVfXHA5t2AofvONoM3DFQv6LdtbQBeLaq9gO7gIuSnNYuRF8E7Grbnkuyof2uKwb2JUkag2FOK70Z\neB/wQJLvtdq/Aq4HbktyFfBj4D1t213AJcAs8DzwfoCqOpDkE8A9rd3Hq+pAW/4gcBNwCvCV9pAk\njcmC4dAuLM/3voML52hfwNXz7Gs7sH2O+r30L3JLkiaA75CWJHUYDpKkDsNBktRhOEiSOgwHSVKH\n4SBJ6jAcJEkdJ2Q4PLDvWdZtvZN1A58HJEn6lRMyHCRJR2c4SJI6DAdJUofhIEnqMBwkSR2GgySp\nw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpI4FwyHJ9iRPJfn+\nQO30JLuTPNJ+ntbqSXJDktkk9yc5b6DP5tb+kSSbB+pvTPJA63NDkiz1JHV8Hf4iJb9MSVq5hjly\nuAnYeERtK7CnqtYDe9o6wMXA+vbYAnwO+mECXAdcAJwPXHc4UFqbDwz0O/J3SZKOs1ULNaiqbyZZ\nd0R5EzDTlncAPeBjrX5zVRVwd5LVSc5obXdX1QGAJLuBjUl6wMur6u5Wvxm4FPjKsUxqOQ3+NfzY\n9e8Y40gkafmMes1hqqr2t+UngKm2vAZ4fKDd3lY7Wn3vHHVJ0hgteOSwkKqqJLUUg1lIki30T1cx\nNTVFr9cbaT9Tp8A15x4CWPQ+Dvcbpe9SOnjw4JL//mOZ22DfQUfbz3LM4XhzDpPBOSy9UcPhySRn\nVNX+dtroqVbfB5w50G5tq+3jV6ehDtd7rb52jvZzqqptwDaA6enpmpmZma/pUf2nW+7gPz7Qn/pj\nly9uH1cOnlZaZN+l1Ov1GHX+8zmWuV05z8Xno+1nOeZwvDmHyeAclt6op5V2AofvONoM3DFQv6Ld\ntbQBeLadftoFXJTktHYh+iJgV9v2XJIN7S6lKwb2JUkakwWPHJJ8gf5f/a9Mspf+XUfXA7cluQr4\nMfCe1vwu4BJgFngeeD9AVR1I8gngntbu44cvTgMfpH9H1Cn0L0RP7MXolcAL5pKWwjB3K713nk0X\nztG2gKvn2c92YPsc9XuBcxYahyaP72OQXrx8h7QkqcNwkCR1HPOtrBqN1wYkTTLDQceNgSitHJ5W\nWoHWbb2TB/Y9uyI+3G6+D+EbnIOkyeORg+bkX/nSic1wOAEt9oXfoJBOPIbDMlhJL6ae1pE0F8NB\ni2KYSCcGw2GCLeURyCS/qK+kIy3pROHdSpKkDo8cNFHmO4pYrqMoj1SkuRkOmljznQobJkCO3CZp\ncQwHrWhHu5YyyddZpElnOEwAT3OMzzAB4nOiE5Hh8CLgX8jLa77/voaGXsy8W0kakZ8PpRczjxxW\niFFegHzROn48NagXG48cJEkdHjlIS8yjCL0YGA7SMvJuKK1UhoM0Zou9NmSY6HgwHKQV5nh8xIhk\nOEgrmHekablMTDgk2Qh8BjgJ+HxVXT/mIUkr1tGOIpbqCMMjlRe3iQiHJCcBnwXeBuwF7kmys6oe\nGu/IpJVvoc+fuubcQ1y59U5f4PVrJiIcgPOB2ap6FCDJrcAmwHCQjhNPUWnQpITDGuDxgfW9wAVj\nGoukRVqOYPFIZrxSVeMeA0neDWysqn/e1t8HXFBVHzqi3RZgS1t9DfDDEX/lK4G/GrHvpHAOk8E5\nTAbnMJx/UFWvGqbhpBw57APOHFhf22q/pqq2AduO9Zclubeqpo91P+PkHCaDc5gMzmHpTcpnK90D\nrE9yVpKTgcuAnWMekySdsCbiyKGqDiX5ELCL/q2s26vqwTEPS5JOWBMRDgBVdRdw13H6dcd8amoC\nOIfJ4Bwmg3NYYhNxQVqSNFkm5ZqDJGmCnFDhkGRjkh8mmU2yddzjWawkZyb5RpKHkjyY5CPjHtOo\nkpyU5LtJvjzusYwqyeoktyf5QZKHk7xp3GNarCT/sv1b+n6SLyR56bjHtJAk25M8leT7A7XTk+xO\n8kj7edo4x7iQeebw79u/pfuTfCnJ6nGO8YQJh4GP6LgYOBt4b5KzxzuqRTsEXFNVZwMbgKtX4BwO\n+wjw8LgHcYw+A3y1ql4LvI4VNp8ka4B/AUxX1Tn0bwa5bLyjGspNwMYjaluBPVW1HtjT1ifZTXTn\nsBs4p6r+IfC/gGuP96AGnTDhwMBHdFTVL4HDH9GxYlTV/qr6Tlv+a/ovRmvGO6rFS7IWeAfw+XGP\nZVRJXgH8HnAjQFX9sqqeGe+oRrIKOCXJKuC3gP8z5vEsqKq+CRw4orwJ2NGWdwCXHtdBLdJcc6iq\nr1XVobZ6N/33e43NiRQOc31Ex4p7YT0syTrgDcC3xjuSkfwJ8FHg78Y9kGNwFvBT4M/a6bHPJzl1\n3INajKraB/wH4CfAfuDZqvraeEc1sqmq2t+WnwCmxjmYJfDPgK+McwAnUji8aCR5GfCXwB9W1XPj\nHs9iJHkn8FRV3TfusRyjVcB5wOeq6g3Az5n8Uxm/pp2X30Q/6P4+cGqSfzreUR276t+CuWJvw0zy\nr+mfQr5lnOM4kcJhqI/omHRJXkI/GG6pqi+OezwjeDPwriSP0T+199Yk/3W8QxrJXmBvVR0+crud\nflisJL8P/KiqflpV/w/4IvCPxjymUT2Z5AyA9vOpMY9nJEmuBN4JXF5jfp/BiRQOK/4jOpKE/jnu\nh6vqj8c9nlFU1bVVtbaq1tF/Dr5eVSvur9WqegJ4PMlrWulCVt5HzP8E2JDkt9q/rQtZYRfVB+wE\nNrflzcAdYxzLSNoXnn0UeFdVPT/u8Zww4dAu9Bz+iI6HgdtW4Ed0vBl4H/2/tr/XHpeMe1AnsA8D\ntyS5H3g98O/GPJ5FaUc9twPfAR6g/3owUe/SnUuSLwD/A3hNkr1JrgKuB96W5BH6R0QT/U2S88zh\nPwO/Dexu/2//l7GO0XdIS5KOdMIcOUiShmc4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKk\njv8PKv7S972dhRMAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "wiki_data[\"delta\"].hist(bins=100)" ] }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 151, "metadata": {}, "outputs": [ { @@ -1352,284 +792,532 @@ " \n", " \n", " \n", - " Grid\n", - " Country\n", - " count\n", - " outliers1\n", - " outliers5\n", - " outliers10\n", + " channel\n", + " countryName\n", + " namespace\n", + " min\n", + " max\n", + " m0\n", + " m1\n", + " m2\n", + " m3\n", + " m4\n", + " m5\n", + " m6\n", + " m7\n", + " m8\n", " \n", " \n", " \n", " \n", " 0\n", - " 1\n", - " 32\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Algeria\n", + " Main\n", + " 0.000000\n", + " 6.666957\n", + " 18.0\n", + " 57.644747\n", + " 234.884785\n", + " 1108.007978\n", + " 5842.083979\n", + " 3.325478e+04\n", + " 1.988641e+05\n", + " 1.226715e+06\n", + " 7.717880e+06\n", " \n", " \n", " 1\n", - " 1\n", - " 33\n", - " 5\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " \n", - " \n", - " 2\n", - " 1\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " \n", - " \n", + " #ar.wikipedia\n", + " Algeria\n", + " نقاش\n", + " 5.267858\n", + " 5.267858\n", + " 1.0\n", + " 5.267858\n", + " 27.750330\n", + " 146.184800\n", + " 770.080792\n", + " 4.056676e+03\n", + " 2.137000e+04\n", + " 1.125741e+05\n", + " 5.930244e+05\n", + " \n", + " \n", + " 2\n", + " #ar.wikipedia\n", + " Australia\n", + " Main\n", + " 5.564520\n", + " 5.564520\n", + " 1.0\n", + " 5.564520\n", + " 30.963887\n", + " 172.299183\n", + " 958.762321\n", + " 5.335052e+03\n", + " 2.968701e+04\n", + " 1.651940e+05\n", + " 9.192252e+05\n", + " \n", + " \n", " 3\n", - " 1\n", - " 46\n", - " 14\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Bahrain\n", + " Main\n", + " 3.784190\n", + " 3.784190\n", + " 1.0\n", + " 3.784190\n", + " 14.320091\n", + " 54.189941\n", + " 205.065012\n", + " 7.760049e+02\n", + " 2.936550e+03\n", + " 1.111246e+04\n", + " 4.205166e+04\n", " \n", " \n", " 4\n", - " 1\n", - " 49\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Belgium\n", + " Main\n", + " 5.459586\n", + " 5.459586\n", + " 1.0\n", + " 5.459586\n", + " 29.807074\n", + " 162.734269\n", + " 888.461660\n", + " 4.850632e+03\n", + " 2.648244e+04\n", + " 1.445832e+05\n", + " 7.893641e+05\n", " \n", " \n", " 5\n", - " 2\n", - " 32\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Denmark\n", + " ويكيبيديا\n", + " 6.030685\n", + " 6.030685\n", + " 1.0\n", + " 6.030685\n", + " 36.369165\n", + " 219.330986\n", + " 1322.716142\n", + " 7.976885e+03\n", + " 4.810608e+04\n", + " 2.901126e+05\n", + " 1.749578e+06\n", " \n", " \n", " 6\n", - " 2\n", - " 33\n", - " 5\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Egypt\n", + " Main\n", + " 0.693147\n", + " 8.664060\n", + " 28.0\n", + " 124.659483\n", + " 680.894125\n", + " 4142.018720\n", + " 26951.803294\n", + " 1.839678e+05\n", + " 1.302402e+06\n", + " 9.492862e+06\n", + " 7.087854e+07\n", " \n", " \n", " 7\n", - " 2\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Hashemite Kingdom of Jordan\n", + " Main\n", + " 0.693147\n", + " 5.164786\n", + " 8.0\n", + " 19.517211\n", + " 59.503557\n", + " 216.944287\n", + " 908.031706\n", + " 4.171502e+03\n", + " 2.025181e+04\n", + " 1.013195e+05\n", + " 5.148902e+05\n", " \n", " \n", " 8\n", - " 2\n", - " 46\n", - " 14\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Hashemite Kingdom of Jordan\n", + " نقاش المستخدم\n", + " 4.127134\n", + " 4.127134\n", + " 1.0\n", + " 4.127134\n", + " 17.033238\n", + " 70.298463\n", + " 290.131205\n", + " 1.197410e+03\n", + " 4.941874e+03\n", + " 2.039578e+04\n", + " 8.417612e+04\n", " \n", " \n", " 9\n", - " 2\n", - " 49\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Iran\n", + " Main\n", + " 0.693147\n", + " 0.693147\n", + " 1.0\n", + " 0.693147\n", + " 0.480453\n", + " 0.333025\n", + " 0.230835\n", + " 1.600027e-01\n", + " 1.109054e-01\n", + " 7.687378e-02\n", + " 5.328484e-02\n", " \n", " \n", " 10\n", - " 3\n", - " 33\n", - " 5\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Iraq\n", + " Main\n", + " 0.000000\n", + " 6.520621\n", + " 19.0\n", + " 61.367666\n", + " 287.348181\n", + " 1524.935621\n", + " 8645.085403\n", + " 5.101748e+04\n", + " 3.091354e+05\n", + " 1.907096e+06\n", + " 1.191112e+07\n", " \n", " \n", " 11\n", - " 3\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Iraq\n", + " نقاش\n", + " 5.926926\n", + " 6.278521\n", + " 2.0\n", + " 12.205447\n", + " 74.548283\n", + " 455.701992\n", + " 2787.931246\n", + " 1.707021e+04\n", + " 1.046042e+05\n", + " 6.415184e+05\n", + " 3.937453e+06\n", " \n", " \n", " 12\n", - " 3\n", - " 46\n", - " 10\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Israel\n", + " Main\n", + " 0.000000\n", + " 5.442418\n", + " 4.0\n", + " 13.757495\n", + " 64.504332\n", + " 308.849582\n", + " 1507.522267\n", + " 7.485986e+03\n", + " 3.773624e+04\n", + " 1.926923e+05\n", + " 9.947230e+05\n", " \n", " \n", " 13\n", - " 3\n", - " 49\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Kuwait\n", + " Main\n", + " 0.693147\n", + " 5.472271\n", + " 9.0\n", + " 27.385355\n", + " 112.181294\n", + " 525.630362\n", + " 2608.214237\n", + " 1.329461e+04\n", + " 6.875724e+04\n", + " 3.588217e+05\n", + " 1.884294e+06\n", " \n", " \n", " 14\n", - " 4\n", - " 32\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " \n", - " \n", - " 15\n", - " 4\n", - " 33\n", - " 5\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " \n", - " \n", - " 16\n", - " 4\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Kuwait\n", + " نقاش المستخدم\n", + " 6.272877\n", + " 6.272877\n", + " 1.0\n", + " 6.272877\n", + " 39.348986\n", + " 246.831349\n", + " 1548.342694\n", + " 9.712563e+03\n", + " 6.092571e+04\n", + " 3.821795e+05\n", + " 2.397365e+06\n", + " \n", + " \n", + " 15\n", + " #ar.wikipedia\n", + " Lebanon\n", + " Main\n", + " 5.236442\n", + " 5.996452\n", + " 2.0\n", + " 11.232894\n", + " 63.377762\n", + " 359.201990\n", + " 2044.811514\n", + " 1.169018e+04\n", + " 6.710735e+04\n", + " 3.867371e+05\n", + " 2.237002e+06\n", + " \n", + " \n", + " 16\n", + " #ar.wikipedia\n", + " Libya\n", + " Main\n", + " 4.488636\n", + " 7.472501\n", + " 3.0\n", + " 19.227964\n", + " 128.792904\n", + " 891.425647\n", + " 6312.404206\n", + " 4.538466e+04\n", + " 3.295322e+05\n", + " 2.407739e+06\n", + " 1.766220e+07\n", " \n", " \n", " 17\n", - " 4\n", - " 46\n", - " 14\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Morocco\n", + " Main\n", + " 0.000000\n", + " 7.754053\n", + " 13.0\n", + " 45.491987\n", + " 239.320305\n", + " 1502.900431\n", + " 10204.694809\n", + " 7.174515e+04\n", + " 5.133805e+05\n", + " 3.712545e+06\n", + " 2.704703e+07\n", " \n", " \n", " 18\n", - " 4\n", - " 49\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Morocco\n", + " بوابة\n", + " 5.521461\n", + " 5.521461\n", + " 1.0\n", + " 5.521461\n", + " 30.486531\n", + " 168.330188\n", + " 929.428552\n", + " 5.131803e+03\n", + " 2.833505e+04\n", + " 1.564509e+05\n", + " 8.638374e+05\n", " \n", " \n", " 19\n", - " 5\n", - " 32\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Morocco\n", + " قالب\n", + " 4.382027\n", + " 4.382027\n", + " 1.0\n", + " 4.382027\n", + " 19.202157\n", + " 84.144365\n", + " 368.722850\n", + " 1.615753e+03\n", + " 7.080274e+03\n", + " 3.102595e+04\n", + " 1.359565e+05\n", " \n", " \n", " 20\n", - " 5\n", - " 33\n", - " 5\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Morocco\n", + " نقاش\n", + " 6.666957\n", + " 6.666957\n", + " 1.0\n", + " 6.666957\n", + " 44.448313\n", + " 296.334981\n", + " 1975.652517\n", + " 1.317159e+04\n", + " 8.781442e+04\n", + " 5.854550e+05\n", + " 3.903203e+06\n", " \n", " \n", " 21\n", - " 5\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Morocco\n", + " نقاش المستخدم\n", + " 5.645447\n", + " 5.645447\n", + " 1.0\n", + " 5.645447\n", + " 31.871071\n", + " 179.926437\n", + " 1015.765146\n", + " 5.734448e+03\n", + " 3.237352e+04\n", + " 1.827630e+05\n", + " 1.031779e+06\n", " \n", " \n", " 22\n", - " 5\n", - " 46\n", - " 14\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Morocco\n", + " ويكيبيديا\n", + " 8.711937\n", + " 8.711937\n", + " 1.0\n", + " 8.711937\n", + " 75.897851\n", + " 661.217316\n", + " 5760.483781\n", + " 5.018497e+04\n", + " 4.372083e+05\n", + " 3.808932e+06\n", + " 3.318317e+07\n", " \n", " \n", " 23\n", - " 5\n", - " 49\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Oman\n", + " Main\n", + " 1.386294\n", + " 3.806662\n", + " 2.0\n", + " 5.192957\n", + " 16.412491\n", + " 57.825323\n", + " 213.673148\n", + " 8.044423e+02\n", + " 3.049848e+03\n", + " 1.159256e+04\n", + " 4.410515e+04\n", " \n", " \n", " 24\n", - " 6\n", - " 33\n", - " 5\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Palestine\n", + " Main\n", + " 0.000000\n", + " 3.465736\n", + " 4.0\n", + " 6.931472\n", + " 24.022651\n", + " 83.256163\n", + " 288.543873\n", + " 1.000017e+03\n", + " 3.465794e+03\n", + " 1.201153e+04\n", + " 4.162878e+04\n", " \n", " \n", " 25\n", - " 6\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Qatar\n", + " Main\n", + " 2.197225\n", + " 4.804021\n", + " 3.0\n", + " 9.486152\n", + " 34.081175\n", + " 136.821624\n", + " 594.057905\n", + " 2.704686e+03\n", + " 1.264015e+04\n", + " 5.988422e+04\n", + " 2.856838e+05\n", " \n", " \n", " 26\n", - " 6\n", - " 46\n", - " 10\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Saudi Arabia\n", + " Main\n", + " 0.000000\n", + " 8.311398\n", + " 97.0\n", + " 308.790738\n", + " 1375.600910\n", + " 7038.339584\n", + " 39448.036322\n", + " 2.366367e+05\n", + " 1.497962e+06\n", + " 9.907004e+06\n", + " 6.793387e+07\n", " \n", " \n", " 27\n", - " 6\n", - " 49\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Saudi Arabia\n", + " مستخدم\n", + " 3.367296\n", + " 3.367296\n", + " 1.0\n", + " 3.367296\n", + " 11.338681\n", + " 38.180694\n", + " 128.565692\n", + " 4.329187e+02\n", + " 1.457765e+03\n", + " 4.908727e+03\n", + " 1.652914e+04\n", " \n", " \n", " 28\n", - " 7\n", - " 33\n", - " 5\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Saudi Arabia\n", + " ويكيبيديا\n", + " 3.610918\n", + " 3.610918\n", + " 1.0\n", + " 3.610918\n", + " 13.038728\n", + " 47.081777\n", + " 170.008432\n", + " 6.138865e+02\n", + " 2.216694e+03\n", + " 8.004299e+03\n", + " 2.890287e+04\n", " \n", " \n", " 29\n", - " 7\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " #ar.wikipedia\n", + " Sudan\n", + " Main\n", + " 4.477337\n", + " 4.976734\n", + " 2.0\n", + " 9.454071\n", + " 44.814424\n", + " 213.018272\n", + " 1015.311782\n", + " 4.852247e+03\n", + " 2.324979e+04\n", + " 1.116849e+05\n", + " 5.378129e+05\n", " \n", " \n", " ...\n", @@ -1639,447 +1327,1742 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 132533\n", - " 9997\n", - " 86\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 994\n", + " #zh.wikipedia\n", + " China\n", + " Main\n", + " 1.098612\n", + " 6.689599\n", + " 22.0\n", + " 80.418326\n", + " 342.517685\n", + " 1597.834083\n", + " 7929.112435\n", + " 4.128033e+04\n", + " 2.237338e+05\n", + " 1.255615e+06\n", + " 7.263910e+06\n", " \n", " \n", - " 132534\n", - " 9997\n", - " 88239\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 995\n", + " #zh.wikipedia\n", + " China\n", + " Wikipedia\n", + " 1.609438\n", + " 6.045005\n", + " 4.0\n", + " 19.648868\n", + " 111.067428\n", + " 656.498970\n", + " 3929.637204\n", + " 2.360291e+04\n", + " 1.419026e+05\n", + " 8.533656e+05\n", + " 5.132418e+06\n", " \n", " \n", - " 132535\n", - " 9998\n", - " 33\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 996\n", + " #zh.wikipedia\n", + " Czech Republic\n", + " Main\n", + " 1.098612\n", + " 4.859812\n", + " 3.0\n", + " 9.569343\n", + " 37.863454\n", + " 163.185710\n", + " 729.264530\n", + " 3.326287e+03\n", + " 1.539243e+04\n", + " 7.202931e+04\n", + " 3.400451e+05\n", " \n", " \n", - " 132536\n", - " 9998\n", - " 34\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 997\n", + " #zh.wikipedia\n", + " Finland\n", + " Main\n", + " 3.332205\n", + " 3.332205\n", + " 1.0\n", + " 3.332205\n", + " 11.103587\n", + " 36.999422\n", + " 123.289642\n", + " 4.108263e+02\n", + " 1.368957e+03\n", + " 4.561646e+03\n", + " 1.520034e+04\n", " \n", " \n", - " 132537\n", - " 9998\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 998\n", + " #zh.wikipedia\n", + " France\n", + " Main\n", + " 3.663562\n", + " 5.123964\n", + " 2.0\n", + " 8.787526\n", + " 39.676691\n", + " 183.700876\n", + " 869.466985\n", + " 4.192038e+03\n", + " 2.051605e+04\n", + " 1.015925e+05\n", + " 5.076205e+05\n", " \n", " \n", - " 132538\n", - " 9998\n", - " 41\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 999\n", + " #zh.wikipedia\n", + " Germany\n", + " Main\n", + " 3.135494\n", + " 3.135494\n", + " 1.0\n", + " 3.135494\n", + " 9.831324\n", + " 30.826059\n", + " 96.654931\n", + " 3.030610e+02\n", + " 9.502459e+02\n", + " 2.979491e+03\n", + " 9.342176e+03\n", " \n", " \n", - " 132539\n", - " 9998\n", - " 46\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1000\n", + " #zh.wikipedia\n", + " Hong Kong\n", + " Main\n", + " 0.000000\n", + " 9.431322\n", + " 440.0\n", + " 1541.199742\n", + " 6864.103775\n", + " 34431.390925\n", + " 189571.354539\n", + " 1.133382e+06\n", + " 7.309490e+06\n", + " 5.048145e+07\n", + " 3.699213e+08\n", " \n", " \n", - " 132540\n", - " 9998\n", - " 49\n", - " 6\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1001\n", + " #zh.wikipedia\n", + " Hong Kong\n", + " Template\n", + " 2.079442\n", + " 3.583519\n", + " 2.0\n", + " 5.662960\n", + " 17.165685\n", + " 55.009811\n", + " 183.604539\n", + " 6.298276e+02\n", + " 2.198520e+03\n", + " 7.756832e+03\n", + " 2.754389e+04\n", " \n", " \n", - " 132541\n", - " 9998\n", - " 86\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1002\n", + " #zh.wikipedia\n", + " Israel\n", + " Main\n", + " 6.265301\n", + " 6.265301\n", + " 1.0\n", + " 6.265301\n", + " 39.253999\n", + " 245.938129\n", + " 1540.876460\n", + " 9.654055e+03\n", + " 6.048556e+04\n", + " 3.789603e+05\n", + " 2.374300e+06\n", " \n", " \n", - " 132542\n", - " 9998\n", - " 88239\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1003\n", + " #zh.wikipedia\n", + " Italy\n", + " Main\n", + " 5.247024\n", + " 5.247024\n", + " 1.0\n", + " 5.247024\n", + " 27.531262\n", + " 144.457192\n", + " 757.970366\n", + " 3.977089e+03\n", + " 2.086788e+04\n", + " 1.094943e+05\n", + " 5.745191e+05\n", " \n", " \n", - " 132543\n", - " 9999\n", - " 33\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1004\n", + " #zh.wikipedia\n", + " Japan\n", + " Main\n", + " 0.000000\n", + " 8.805225\n", + " 12.0\n", + " 51.071233\n", + " 279.236525\n", + " 1743.367154\n", + " 12017.873604\n", + " 8.883793e+04\n", + " 6.890841e+05\n", + " 5.526233e+06\n", + " 4.537615e+07\n", " \n", " \n", - " 132544\n", - " 9999\n", - " 34\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " \n", - " \n", - " 132545\n", - " 9999\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1005\n", + " #zh.wikipedia\n", + " Japan\n", + " Talk\n", + " 3.806662\n", + " 3.806662\n", + " 1.0\n", + " 3.806662\n", + " 14.490679\n", + " 55.161125\n", + " 209.979787\n", + " 7.993222e+02\n", + " 3.042750e+03\n", + " 1.158272e+04\n", + " 4.409151e+04\n", " \n", " \n", - " 132546\n", - " 9999\n", - " 41\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1006\n", + " #zh.wikipedia\n", + " Macao\n", + " Main\n", + " 1.386294\n", + " 6.295266\n", + " 12.0\n", + " 48.543764\n", + " 228.344849\n", + " 1169.963335\n", + " 6287.631418\n", + " 3.476625e+04\n", + " 1.958443e+05\n", + " 1.118023e+06\n", + " 6.448414e+06\n", " \n", " \n", - " 132547\n", - " 9999\n", - " 43\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1007\n", + " #zh.wikipedia\n", + " Macao\n", + " Wikipedia\n", + " 5.370638\n", + " 5.370638\n", + " 1.0\n", + " 5.370638\n", + " 28.843753\n", + " 154.909356\n", + " 831.962077\n", + " 4.468167e+03\n", + " 2.399691e+04\n", + " 1.288787e+05\n", + " 6.921609e+05\n", " \n", " \n", - " 132548\n", - " 9999\n", - " 44\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1008\n", + " #zh.wikipedia\n", + " Malaysia\n", + " Main\n", + " 0.000000\n", + " 6.109248\n", + " 45.0\n", + " 127.018209\n", + " 459.199656\n", + " 1821.140937\n", + " 7705.150344\n", + " 3.435583e+04\n", + " 1.604540e+05\n", + " 7.817490e+05\n", + " 3.958450e+06\n", " \n", " \n", - " 132549\n", - " 9999\n", - " 46\n", - " 8\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1009\n", + " #zh.wikipedia\n", + " New Zealand\n", + " Main\n", + " 2.484907\n", + " 3.951244\n", + " 2.0\n", + " 6.436150\n", + " 21.787088\n", + " 77.031814\n", + " 281.872426\n", + " 1.057839e+03\n", + " 4.040852e+03\n", + " 1.562117e+04\n", + " 6.086522e+04\n", " \n", " \n", - " 132550\n", - " 9999\n", - " 49\n", - " 9\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1010\n", + " #zh.wikipedia\n", + " Portugal\n", + " Main\n", + " 2.484907\n", + " 5.187386\n", + " 2.0\n", + " 7.672292\n", + " 33.083733\n", + " 154.930922\n", + " 762.220421\n", + " 3.850892e+03\n", + " 1.972002e+04\n", + " 1.016591e+05\n", + " 5.257640e+05\n", " \n", " \n", - " 132551\n", - " 9999\n", - " 86\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1011\n", + " #zh.wikipedia\n", + " Republic of Korea\n", + " Main\n", + " 3.091042\n", + " 3.091042\n", + " 1.0\n", + " 3.091042\n", + " 9.554543\n", + " 29.533499\n", + " 91.289301\n", + " 2.821791e+02\n", + " 8.722276e+02\n", + " 2.696093e+03\n", + " 8.333736e+03\n", " \n", " \n", - " 132552\n", - " 9999\n", - " 88239\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1012\n", + " #zh.wikipedia\n", + " Singapore\n", + " Main\n", + " 1.386294\n", + " 4.983607\n", + " 12.0\n", + " 32.615067\n", + " 106.962952\n", + " 399.085420\n", + " 1608.995005\n", + " 6.796709e+03\n", + " 2.960535e+04\n", + " 1.318929e+05\n", + " 5.982321e+05\n", " \n", " \n", - " 132553\n", - " 10000\n", - " 33\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1013\n", + " #zh.wikipedia\n", + " Taiwan\n", + " File\n", + " 0.693147\n", + " 5.398163\n", + " 3.0\n", + " 7.189922\n", + " 30.827563\n", + " 158.962321\n", + " 850.836518\n", + " 4.585605e+03\n", + " 2.474621e+04\n", + " 1.335760e+05\n", + " 7.210561e+05\n", " \n", " \n", - " 132554\n", - " 10000\n", - " 34\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1014\n", + " #zh.wikipedia\n", + " Taiwan\n", + " Main\n", + " 0.000000\n", + " 8.348775\n", + " 659.0\n", + " 2209.913454\n", + " 9248.880591\n", + " 43555.882096\n", + " 222371.087542\n", + " 1.205020e+06\n", + " 6.842703e+06\n", + " 4.039094e+07\n", + " 2.465605e+08\n", " \n", " \n", - " 132555\n", - " 10000\n", - " 39\n", - " 144\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1015\n", + " #zh.wikipedia\n", + " Taiwan\n", + " Talk\n", + " 2.564949\n", + " 6.984716\n", + " 8.0\n", + " 33.299343\n", + " 158.796546\n", + " 848.436567\n", + " 4912.187478\n", + " 2.994611e+04\n", + " 1.885966e+05\n", + " 1.213369e+06\n", + " 7.924473e+06\n", " \n", " \n", - " 132556\n", - " 10000\n", - " 41\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1016\n", + " #zh.wikipedia\n", + " Taiwan\n", + " Template\n", + " 2.302585\n", + " 4.127134\n", + " 2.0\n", + " 6.429719\n", + " 22.335136\n", + " 82.506535\n", + " 318.241328\n", + " 1.262136e+03\n", + " 5.090911e+03\n", + " 2.073895e+04\n", + " 8.496629e+04\n", " \n", " \n", - " 132557\n", - " 10000\n", - " 43\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1017\n", + " #zh.wikipedia\n", + " Taiwan\n", + " User\n", + " 3.555348\n", + " 3.555348\n", + " 1.0\n", + " 3.555348\n", + " 12.640500\n", + " 44.941377\n", + " 159.782236\n", + " 5.680815e+02\n", + " 2.019727e+03\n", + " 7.180834e+03\n", + " 2.553036e+04\n", " \n", " \n", - " 132558\n", - " 10000\n", - " 44\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 1018\n", + " #zh.wikipedia\n", + " Taiwan\n", + " User talk\n", + " 6.028279\n", + " 6.028279\n", + " 1.0\n", + " 6.028279\n", + " 36.340142\n", + " 219.068497\n", + " 1320.605915\n", + " 7.960980e+03\n", + " 4.799101e+04\n", + " 2.893032e+05\n", + " 1.744000e+06\n", + " \n", + " \n", + " 1019\n", + " #zh.wikipedia\n", + " Taiwan\n", + " Wikipedia\n", + " 5.843544\n", + " 7.059618\n", + " 7.0\n", + " 45.332490\n", + " 294.732410\n", + " 1923.808607\n", + " 12607.044189\n", + " 8.294237e+04\n", + " 5.478200e+05\n", + " 3.632255e+06\n", + " 2.417480e+07\n", + " \n", + " \n", + " 1020\n", + " #zh.wikipedia\n", + " United Kingdom\n", + " Main\n", + " 2.564949\n", + " 2.772589\n", + " 2.0\n", + " 5.337538\n", + " 14.266213\n", + " 38.188290\n", + " 102.376568\n", + " 2.748609e+02\n", + " 7.390245e+02\n", + " 1.989885e+03\n", + " 5.365475e+03\n", + " \n", + " \n", + " 1021\n", + " #zh.wikipedia\n", + " United States\n", + " Main\n", + " 0.000000\n", + " 7.198931\n", + " 39.0\n", + " 129.906500\n", + " 595.110290\n", + " 3138.035261\n", + " 17972.964491\n", + " 1.084415e+05\n", + " 6.770740e+05\n", + " 4.328078e+06\n", + " 2.814110e+07\n", + " \n", + " \n", + " 1022\n", + " #zh.wikipedia\n", + " United States\n", + " Template\n", + " 2.995732\n", + " 2.995732\n", + " 1.0\n", + " 2.995732\n", + " 8.974412\n", + " 26.884935\n", + " 80.540068\n", + " 2.412765e+02\n", + " 7.227997e+02\n", + " 2.165315e+03\n", + " 6.486703e+03\n", + " \n", + " \n", + " 1023\n", + " #zh.wikipedia\n", + " Vietnam\n", + " Main\n", + " 2.079442\n", + " 5.283204\n", + " 5.0\n", + " 18.003488\n", + " 73.970915\n", + " 332.114026\n", + " 1572.094310\n", + " 7.672361e+03\n", + " 3.813046e+04\n", + " 1.916971e+05\n", + " 9.712955e+05\n", + " \n", + " \n", + "\n", + "

1024 rows × 14 columns

\n", + "" + ], + "text/plain": [ + " channel countryName namespace min \\\n", + "0 #ar.wikipedia Algeria Main 0.000000 \n", + "1 #ar.wikipedia Algeria نقاش 5.267858 \n", + "2 #ar.wikipedia Australia Main 5.564520 \n", + "3 #ar.wikipedia Bahrain Main 3.784190 \n", + "4 #ar.wikipedia Belgium Main 5.459586 \n", + "5 #ar.wikipedia Denmark ويكيبيديا 6.030685 \n", + "6 #ar.wikipedia Egypt Main 0.693147 \n", + "7 #ar.wikipedia Hashemite Kingdom of Jordan Main 0.693147 \n", + "8 #ar.wikipedia Hashemite Kingdom of Jordan نقاش المستخدم 4.127134 \n", + "9 #ar.wikipedia Iran Main 0.693147 \n", + "10 #ar.wikipedia Iraq Main 0.000000 \n", + "11 #ar.wikipedia Iraq نقاش 5.926926 \n", + "12 #ar.wikipedia Israel Main 0.000000 \n", + "13 #ar.wikipedia Kuwait Main 0.693147 \n", + "14 #ar.wikipedia Kuwait نقاش المستخدم 6.272877 \n", + "15 #ar.wikipedia Lebanon Main 5.236442 \n", + "16 #ar.wikipedia Libya Main 4.488636 \n", + "17 #ar.wikipedia Morocco Main 0.000000 \n", + "18 #ar.wikipedia Morocco بوابة 5.521461 \n", + "19 #ar.wikipedia Morocco قالب 4.382027 \n", + "20 #ar.wikipedia Morocco نقاش 6.666957 \n", + "21 #ar.wikipedia Morocco نقاش المستخدم 5.645447 \n", + "22 #ar.wikipedia Morocco ويكيبيديا 8.711937 \n", + "23 #ar.wikipedia Oman Main 1.386294 \n", + "24 #ar.wikipedia Palestine Main 0.000000 \n", + "25 #ar.wikipedia Qatar Main 2.197225 \n", + "26 #ar.wikipedia Saudi Arabia Main 0.000000 \n", + "27 #ar.wikipedia Saudi Arabia مستخدم 3.367296 \n", + "28 #ar.wikipedia Saudi Arabia ويكيبيديا 3.610918 \n", + "29 #ar.wikipedia Sudan Main 4.477337 \n", + "... ... ... ... ... \n", + "994 #zh.wikipedia China Main 1.098612 \n", + "995 #zh.wikipedia China Wikipedia 1.609438 \n", + "996 #zh.wikipedia Czech Republic Main 1.098612 \n", + "997 #zh.wikipedia Finland Main 3.332205 \n", + "998 #zh.wikipedia France Main 3.663562 \n", + "999 #zh.wikipedia Germany Main 3.135494 \n", + "1000 #zh.wikipedia Hong Kong Main 0.000000 \n", + "1001 #zh.wikipedia Hong Kong Template 2.079442 \n", + "1002 #zh.wikipedia Israel Main 6.265301 \n", + "1003 #zh.wikipedia Italy Main 5.247024 \n", + "1004 #zh.wikipedia Japan Main 0.000000 \n", + "1005 #zh.wikipedia Japan Talk 3.806662 \n", + "1006 #zh.wikipedia Macao Main 1.386294 \n", + "1007 #zh.wikipedia Macao Wikipedia 5.370638 \n", + "1008 #zh.wikipedia Malaysia Main 0.000000 \n", + "1009 #zh.wikipedia New Zealand Main 2.484907 \n", + "1010 #zh.wikipedia Portugal Main 2.484907 \n", + "1011 #zh.wikipedia Republic of Korea Main 3.091042 \n", + "1012 #zh.wikipedia Singapore Main 1.386294 \n", + "1013 #zh.wikipedia Taiwan File 0.693147 \n", + "1014 #zh.wikipedia Taiwan Main 0.000000 \n", + "1015 #zh.wikipedia Taiwan Talk 2.564949 \n", + "1016 #zh.wikipedia Taiwan Template 2.302585 \n", + "1017 #zh.wikipedia Taiwan User 3.555348 \n", + "1018 #zh.wikipedia Taiwan User talk 6.028279 \n", + "1019 #zh.wikipedia Taiwan Wikipedia 5.843544 \n", + "1020 #zh.wikipedia United Kingdom Main 2.564949 \n", + "1021 #zh.wikipedia United States Main 0.000000 \n", + "1022 #zh.wikipedia United States Template 2.995732 \n", + "1023 #zh.wikipedia Vietnam Main 2.079442 \n", + "\n", + " max m0 m1 m2 m3 m4 \\\n", + "0 6.666957 18.0 57.644747 234.884785 1108.007978 5842.083979 \n", + "1 5.267858 1.0 5.267858 27.750330 146.184800 770.080792 \n", + "2 5.564520 1.0 5.564520 30.963887 172.299183 958.762321 \n", + "3 3.784190 1.0 3.784190 14.320091 54.189941 205.065012 \n", + "4 5.459586 1.0 5.459586 29.807074 162.734269 888.461660 \n", + "5 6.030685 1.0 6.030685 36.369165 219.330986 1322.716142 \n", + "6 8.664060 28.0 124.659483 680.894125 4142.018720 26951.803294 \n", + "7 5.164786 8.0 19.517211 59.503557 216.944287 908.031706 \n", + "8 4.127134 1.0 4.127134 17.033238 70.298463 290.131205 \n", + "9 0.693147 1.0 0.693147 0.480453 0.333025 0.230835 \n", + "10 6.520621 19.0 61.367666 287.348181 1524.935621 8645.085403 \n", + "11 6.278521 2.0 12.205447 74.548283 455.701992 2787.931246 \n", + "12 5.442418 4.0 13.757495 64.504332 308.849582 1507.522267 \n", + "13 5.472271 9.0 27.385355 112.181294 525.630362 2608.214237 \n", + "14 6.272877 1.0 6.272877 39.348986 246.831349 1548.342694 \n", + "15 5.996452 2.0 11.232894 63.377762 359.201990 2044.811514 \n", + "16 7.472501 3.0 19.227964 128.792904 891.425647 6312.404206 \n", + "17 7.754053 13.0 45.491987 239.320305 1502.900431 10204.694809 \n", + "18 5.521461 1.0 5.521461 30.486531 168.330188 929.428552 \n", + "19 4.382027 1.0 4.382027 19.202157 84.144365 368.722850 \n", + "20 6.666957 1.0 6.666957 44.448313 296.334981 1975.652517 \n", + "21 5.645447 1.0 5.645447 31.871071 179.926437 1015.765146 \n", + "22 8.711937 1.0 8.711937 75.897851 661.217316 5760.483781 \n", + "23 3.806662 2.0 5.192957 16.412491 57.825323 213.673148 \n", + "24 3.465736 4.0 6.931472 24.022651 83.256163 288.543873 \n", + "25 4.804021 3.0 9.486152 34.081175 136.821624 594.057905 \n", + "26 8.311398 97.0 308.790738 1375.600910 7038.339584 39448.036322 \n", + "27 3.367296 1.0 3.367296 11.338681 38.180694 128.565692 \n", + "28 3.610918 1.0 3.610918 13.038728 47.081777 170.008432 \n", + "29 4.976734 2.0 9.454071 44.814424 213.018272 1015.311782 \n", + "... ... ... ... ... ... ... \n", + "994 6.689599 22.0 80.418326 342.517685 1597.834083 7929.112435 \n", + "995 6.045005 4.0 19.648868 111.067428 656.498970 3929.637204 \n", + "996 4.859812 3.0 9.569343 37.863454 163.185710 729.264530 \n", + "997 3.332205 1.0 3.332205 11.103587 36.999422 123.289642 \n", + "998 5.123964 2.0 8.787526 39.676691 183.700876 869.466985 \n", + "999 3.135494 1.0 3.135494 9.831324 30.826059 96.654931 \n", + "1000 9.431322 440.0 1541.199742 6864.103775 34431.390925 189571.354539 \n", + "1001 3.583519 2.0 5.662960 17.165685 55.009811 183.604539 \n", + "1002 6.265301 1.0 6.265301 39.253999 245.938129 1540.876460 \n", + "1003 5.247024 1.0 5.247024 27.531262 144.457192 757.970366 \n", + "1004 8.805225 12.0 51.071233 279.236525 1743.367154 12017.873604 \n", + "1005 3.806662 1.0 3.806662 14.490679 55.161125 209.979787 \n", + "1006 6.295266 12.0 48.543764 228.344849 1169.963335 6287.631418 \n", + "1007 5.370638 1.0 5.370638 28.843753 154.909356 831.962077 \n", + "1008 6.109248 45.0 127.018209 459.199656 1821.140937 7705.150344 \n", + "1009 3.951244 2.0 6.436150 21.787088 77.031814 281.872426 \n", + "1010 5.187386 2.0 7.672292 33.083733 154.930922 762.220421 \n", + "1011 3.091042 1.0 3.091042 9.554543 29.533499 91.289301 \n", + "1012 4.983607 12.0 32.615067 106.962952 399.085420 1608.995005 \n", + "1013 5.398163 3.0 7.189922 30.827563 158.962321 850.836518 \n", + "1014 8.348775 659.0 2209.913454 9248.880591 43555.882096 222371.087542 \n", + "1015 6.984716 8.0 33.299343 158.796546 848.436567 4912.187478 \n", + "1016 4.127134 2.0 6.429719 22.335136 82.506535 318.241328 \n", + "1017 3.555348 1.0 3.555348 12.640500 44.941377 159.782236 \n", + "1018 6.028279 1.0 6.028279 36.340142 219.068497 1320.605915 \n", + "1019 7.059618 7.0 45.332490 294.732410 1923.808607 12607.044189 \n", + "1020 2.772589 2.0 5.337538 14.266213 38.188290 102.376568 \n", + "1021 7.198931 39.0 129.906500 595.110290 3138.035261 17972.964491 \n", + "1022 2.995732 1.0 2.995732 8.974412 26.884935 80.540068 \n", + "1023 5.283204 5.0 18.003488 73.970915 332.114026 1572.094310 \n", + "\n", + " m5 m6 m7 m8 \n", + "0 3.325478e+04 1.988641e+05 1.226715e+06 7.717880e+06 \n", + "1 4.056676e+03 2.137000e+04 1.125741e+05 5.930244e+05 \n", + "2 5.335052e+03 2.968701e+04 1.651940e+05 9.192252e+05 \n", + "3 7.760049e+02 2.936550e+03 1.111246e+04 4.205166e+04 \n", + "4 4.850632e+03 2.648244e+04 1.445832e+05 7.893641e+05 \n", + "5 7.976885e+03 4.810608e+04 2.901126e+05 1.749578e+06 \n", + "6 1.839678e+05 1.302402e+06 9.492862e+06 7.087854e+07 \n", + "7 4.171502e+03 2.025181e+04 1.013195e+05 5.148902e+05 \n", + "8 1.197410e+03 4.941874e+03 2.039578e+04 8.417612e+04 \n", + "9 1.600027e-01 1.109054e-01 7.687378e-02 5.328484e-02 \n", + "10 5.101748e+04 3.091354e+05 1.907096e+06 1.191112e+07 \n", + "11 1.707021e+04 1.046042e+05 6.415184e+05 3.937453e+06 \n", + "12 7.485986e+03 3.773624e+04 1.926923e+05 9.947230e+05 \n", + "13 1.329461e+04 6.875724e+04 3.588217e+05 1.884294e+06 \n", + "14 9.712563e+03 6.092571e+04 3.821795e+05 2.397365e+06 \n", + "15 1.169018e+04 6.710735e+04 3.867371e+05 2.237002e+06 \n", + "16 4.538466e+04 3.295322e+05 2.407739e+06 1.766220e+07 \n", + "17 7.174515e+04 5.133805e+05 3.712545e+06 2.704703e+07 \n", + "18 5.131803e+03 2.833505e+04 1.564509e+05 8.638374e+05 \n", + "19 1.615753e+03 7.080274e+03 3.102595e+04 1.359565e+05 \n", + "20 1.317159e+04 8.781442e+04 5.854550e+05 3.903203e+06 \n", + "21 5.734448e+03 3.237352e+04 1.827630e+05 1.031779e+06 \n", + "22 5.018497e+04 4.372083e+05 3.808932e+06 3.318317e+07 \n", + "23 8.044423e+02 3.049848e+03 1.159256e+04 4.410515e+04 \n", + "24 1.000017e+03 3.465794e+03 1.201153e+04 4.162878e+04 \n", + "25 2.704686e+03 1.264015e+04 5.988422e+04 2.856838e+05 \n", + "26 2.366367e+05 1.497962e+06 9.907004e+06 6.793387e+07 \n", + "27 4.329187e+02 1.457765e+03 4.908727e+03 1.652914e+04 \n", + "28 6.138865e+02 2.216694e+03 8.004299e+03 2.890287e+04 \n", + "29 4.852247e+03 2.324979e+04 1.116849e+05 5.378129e+05 \n", + "... ... ... ... ... \n", + "994 4.128033e+04 2.237338e+05 1.255615e+06 7.263910e+06 \n", + "995 2.360291e+04 1.419026e+05 8.533656e+05 5.132418e+06 \n", + "996 3.326287e+03 1.539243e+04 7.202931e+04 3.400451e+05 \n", + "997 4.108263e+02 1.368957e+03 4.561646e+03 1.520034e+04 \n", + "998 4.192038e+03 2.051605e+04 1.015925e+05 5.076205e+05 \n", + "999 3.030610e+02 9.502459e+02 2.979491e+03 9.342176e+03 \n", + "1000 1.133382e+06 7.309490e+06 5.048145e+07 3.699213e+08 \n", + "1001 6.298276e+02 2.198520e+03 7.756832e+03 2.754389e+04 \n", + "1002 9.654055e+03 6.048556e+04 3.789603e+05 2.374300e+06 \n", + "1003 3.977089e+03 2.086788e+04 1.094943e+05 5.745191e+05 \n", + "1004 8.883793e+04 6.890841e+05 5.526233e+06 4.537615e+07 \n", + "1005 7.993222e+02 3.042750e+03 1.158272e+04 4.409151e+04 \n", + "1006 3.476625e+04 1.958443e+05 1.118023e+06 6.448414e+06 \n", + "1007 4.468167e+03 2.399691e+04 1.288787e+05 6.921609e+05 \n", + "1008 3.435583e+04 1.604540e+05 7.817490e+05 3.958450e+06 \n", + "1009 1.057839e+03 4.040852e+03 1.562117e+04 6.086522e+04 \n", + "1010 3.850892e+03 1.972002e+04 1.016591e+05 5.257640e+05 \n", + "1011 2.821791e+02 8.722276e+02 2.696093e+03 8.333736e+03 \n", + "1012 6.796709e+03 2.960535e+04 1.318929e+05 5.982321e+05 \n", + "1013 4.585605e+03 2.474621e+04 1.335760e+05 7.210561e+05 \n", + "1014 1.205020e+06 6.842703e+06 4.039094e+07 2.465605e+08 \n", + "1015 2.994611e+04 1.885966e+05 1.213369e+06 7.924473e+06 \n", + "1016 1.262136e+03 5.090911e+03 2.073895e+04 8.496629e+04 \n", + "1017 5.680815e+02 2.019727e+03 7.180834e+03 2.553036e+04 \n", + "1018 7.960980e+03 4.799101e+04 2.893032e+05 1.744000e+06 \n", + "1019 8.294237e+04 5.478200e+05 3.632255e+06 2.417480e+07 \n", + "1020 2.748609e+02 7.390245e+02 1.989885e+03 5.365475e+03 \n", + "1021 1.084415e+05 6.770740e+05 4.328078e+06 2.814110e+07 \n", + "1022 2.412765e+02 7.227997e+02 2.165315e+03 6.486703e+03 \n", + "1023 7.672361e+03 3.813046e+04 1.916971e+05 9.712955e+05 \n", + "\n", + "[1024 rows x 14 columns]" + ] + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wiki_cube = wiki_data.groupby(attributes).agg({metric: [\n", + " 'min',\n", + " 'max',\n", + " moment(0),\n", + " moment(1),\n", + " moment(2),\n", + " moment(3),\n", + " moment(4),\n", + " moment(5),\n", + " moment(6),\n", + " moment(7),\n", + " moment(8)\n", + "]}).reset_index(col_level=1)\n", + "wiki_cube.columns = wiki_cube.columns.get_level_values(1)\n", + "wiki_cube" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "wiki_cube.to_csv('lib/src/test/resources/wiki_moments_cubed.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", "
channelcountryNamenamespacecountoutliers1outliers5outliers10
0#ar.wikipediaAlgeriaMain180.00.02.0
1#ar.wikipediaAlgeriaنقاش10.00.00.0
2#ar.wikipediaAustraliaMain10.00.00.0
3#ar.wikipediaBahrainMain10.00.00.0
4#ar.wikipediaBelgiumMain10.00.00.0
5#ar.wikipediaDenmarkويكيبيديا10.00.01.0
6#ar.wikipediaEgyptMain281.04.08.0
7#ar.wikipediaHashemite Kingdom of JordanMain80.00.00.0
8#ar.wikipediaHashemite Kingdom of Jordanنقاش المستخدم10.00.00.0
9#ar.wikipediaIranMain10.00.00.0
10#ar.wikipediaIraqMain190.00.04.0
11#ar.wikipediaIraqنقاش20.00.01.0
12#ar.wikipediaIsraelMain40.00.00.0
13#ar.wikipediaKuwaitMain90.00.00.0
14#ar.wikipediaKuwaitنقاش المستخدم10.00.01.0
15#ar.wikipediaLebanonMain20.00.00.0
16#ar.wikipediaLibyaMain30.02.02.0
17#ar.wikipediaMoroccoMain130.02.04.0
18#ar.wikipediaMoroccoبوابة10.00.00.0
19#ar.wikipediaMoroccoقالب10.00.00.0
20#ar.wikipediaMoroccoنقاش10.00.01.0
21#ar.wikipediaMoroccoنقاش المستخدم10.00.00.0
22#ar.wikipediaMoroccoويكيبيديا11.01.01.0
23#ar.wikipediaOmanMain20.00.00.0
24#ar.wikipediaPalestineMain40.00.00.0
25#ar.wikipediaQatarMain30.00.00.0
26#ar.wikipediaSaudi ArabiaMain970.03.08.0
27#ar.wikipediaSaudi Arabiaمستخدم10.00.00.0
28#ar.wikipediaSaudi Arabiaويكيبيديا10.00.00.0
29#ar.wikipediaSudanMain20.00.00.0
........................
994#zh.wikipediaChinaMain220.00.01.0
995#zh.wikipediaChinaWikipedia40.00.02.0
996#zh.wikipediaCzech RepublicMain30.00.00.0
997#zh.wikipediaFinlandMain10.00.00.0
998#zh.wikipediaFranceMain20.00.00.0
999#zh.wikipediaGermanyMain10.00.00.0
1000#zh.wikipediaHong KongMain4402.014.022.0
1001#zh.wikipediaHong KongTemplate20.00.00.0
1002#zh.wikipediaIsraelMain10.00.01.0
1003#zh.wikipediaItalyMain10.00.00.0
1004#zh.wikipediaJapanMain121.02.03.0
1005#zh.wikipediaJapanTalk10.00.00.0
1006#zh.wikipediaMacaoMain120.00.01.0
1007#zh.wikipediaMacaoWikipedia10.00.00.0
1008#zh.wikipediaMalaysiaMain450.00.01.0
1009#zh.wikipediaNew ZealandMain20.00.00.0
1010#zh.wikipediaPortugalMain20.00.00.0
1011#zh.wikipediaRepublic of KoreaMain10.00.00.0
1012#zh.wikipediaSingaporeMain120.00.00.0
1013#zh.wikipediaTaiwanFile30.00.00.0
1014#zh.wikipediaTaiwanMain6590.06.039.0
13255910000461015#zh.wikipediaTaiwanTalk80.01.01.0
1016#zh.wikipediaTaiwanTemplate20.00.00.0
132560100004991017#zh.wikipediaTaiwanUser10.00.00.0
13256110000861018#zh.wikipediaTaiwanUser talk10.00.01.0
1019#zh.wikipediaTaiwanWikipedia70.02.06.0
13256210000882391020#zh.wikipediaUnited KingdomMain20.00.00.0
1021#zh.wikipediaUnited StatesMain390.01.05.0
1022#zh.wikipediaUnited StatesTemplate10.00.00.0
1023#zh.wikipediaVietnamMain50.00.00.0
\n", - "

132563 rows × 6 columns

\n", + "

1024 rows × 7 columns

\n", "
" ], "text/plain": [ - " Grid Country count outliers1 outliers5 outliers10\n", - "0 1 32 1 0.0 0.0 0.0\n", - "1 1 33 5 0.0 0.0 0.0\n", - "2 1 39 144 0.0 0.0 0.0\n", - "3 1 46 14 0.0 0.0 0.0\n", - "4 1 49 3 0.0 0.0 0.0\n", - "5 2 32 1 0.0 0.0 0.0\n", - "6 2 33 5 0.0 0.0 0.0\n", - "7 2 39 144 0.0 0.0 0.0\n", - "8 2 46 14 0.0 0.0 0.0\n", - "9 2 49 3 0.0 0.0 0.0\n", - "10 3 33 5 0.0 0.0 0.0\n", - "11 3 39 144 0.0 0.0 0.0\n", - "12 3 46 10 0.0 0.0 0.0\n", - "13 3 49 3 0.0 0.0 0.0\n", - "14 4 32 1 0.0 0.0 0.0\n", - "15 4 33 5 0.0 0.0 0.0\n", - "16 4 39 144 0.0 0.0 0.0\n", - "17 4 46 14 0.0 0.0 0.0\n", - "18 4 49 3 0.0 0.0 0.0\n", - "19 5 32 1 0.0 0.0 0.0\n", - "20 5 33 5 0.0 0.0 0.0\n", - "21 5 39 144 0.0 0.0 0.0\n", - "22 5 46 14 0.0 0.0 0.0\n", - "23 5 49 3 0.0 0.0 0.0\n", - "24 6 33 5 0.0 0.0 0.0\n", - "25 6 39 144 0.0 0.0 0.0\n", - "26 6 46 10 0.0 0.0 0.0\n", - "27 6 49 3 0.0 0.0 0.0\n", - "28 7 33 5 0.0 0.0 0.0\n", - "29 7 39 144 0.0 0.0 0.0\n", - "... ... ... ... ... ... ...\n", - "132533 9997 86 1 0.0 0.0 0.0\n", - "132534 9997 88239 2 0.0 0.0 0.0\n", - "132535 9998 33 1 0.0 0.0 0.0\n", - "132536 9998 34 2 0.0 0.0 0.0\n", - "132537 9998 39 144 0.0 0.0 0.0\n", - "132538 9998 41 1 0.0 0.0 0.0\n", - "132539 9998 46 4 0.0 0.0 0.0\n", - "132540 9998 49 6 0.0 0.0 0.0\n", - "132541 9998 86 1 0.0 0.0 0.0\n", - "132542 9998 88239 2 0.0 0.0 0.0\n", - "132543 9999 33 4 0.0 0.0 0.0\n", - "132544 9999 34 4 0.0 0.0 0.0\n", - "132545 9999 39 144 0.0 0.0 0.0\n", - "132546 9999 41 4 0.0 0.0 0.0\n", - "132547 9999 43 1 0.0 0.0 0.0\n", - "132548 9999 44 1 0.0 0.0 0.0\n", - "132549 9999 46 8 0.0 0.0 0.0\n", - "132550 9999 49 9 0.0 0.0 0.0\n", - "132551 9999 86 1 0.0 0.0 0.0\n", - "132552 9999 88239 2 0.0 0.0 0.0\n", - "132553 10000 33 4 0.0 0.0 0.0\n", - "132554 10000 34 4 0.0 0.0 0.0\n", - "132555 10000 39 144 0.0 0.0 0.0\n", - "132556 10000 41 4 0.0 0.0 0.0\n", - "132557 10000 43 1 0.0 0.0 0.0\n", - "132558 10000 44 1 0.0 0.0 0.0\n", - "132559 10000 46 8 0.0 0.0 0.0\n", - "132560 10000 49 9 0.0 0.0 0.0\n", - "132561 10000 86 1 0.0 0.0 0.0\n", - "132562 10000 88239 2 0.0 0.0 0.0\n", + " channel countryName namespace count \\\n", + "0 #ar.wikipedia Algeria Main 18 \n", + "1 #ar.wikipedia Algeria نقاش 1 \n", + "2 #ar.wikipedia Australia Main 1 \n", + "3 #ar.wikipedia Bahrain Main 1 \n", + "4 #ar.wikipedia Belgium Main 1 \n", + "5 #ar.wikipedia Denmark ويكيبيديا 1 \n", + "6 #ar.wikipedia Egypt Main 28 \n", + "7 #ar.wikipedia Hashemite Kingdom of Jordan Main 8 \n", + "8 #ar.wikipedia Hashemite Kingdom of Jordan نقاش المستخدم 1 \n", + "9 #ar.wikipedia Iran Main 1 \n", + "10 #ar.wikipedia Iraq Main 19 \n", + "11 #ar.wikipedia Iraq نقاش 2 \n", + "12 #ar.wikipedia Israel Main 4 \n", + "13 #ar.wikipedia Kuwait Main 9 \n", + "14 #ar.wikipedia Kuwait نقاش المستخدم 1 \n", + "15 #ar.wikipedia Lebanon Main 2 \n", + "16 #ar.wikipedia Libya Main 3 \n", + "17 #ar.wikipedia Morocco Main 13 \n", + "18 #ar.wikipedia Morocco بوابة 1 \n", + "19 #ar.wikipedia Morocco قالب 1 \n", + "20 #ar.wikipedia Morocco نقاش 1 \n", + "21 #ar.wikipedia Morocco نقاش المستخدم 1 \n", + "22 #ar.wikipedia Morocco ويكيبيديا 1 \n", + "23 #ar.wikipedia Oman Main 2 \n", + "24 #ar.wikipedia Palestine Main 4 \n", + "25 #ar.wikipedia Qatar Main 3 \n", + "26 #ar.wikipedia Saudi Arabia Main 97 \n", + "27 #ar.wikipedia Saudi Arabia مستخدم 1 \n", + "28 #ar.wikipedia Saudi Arabia ويكيبيديا 1 \n", + "29 #ar.wikipedia Sudan Main 2 \n", + "... ... ... ... ... \n", + "994 #zh.wikipedia China Main 22 \n", + "995 #zh.wikipedia China Wikipedia 4 \n", + "996 #zh.wikipedia Czech Republic Main 3 \n", + "997 #zh.wikipedia Finland Main 1 \n", + "998 #zh.wikipedia France Main 2 \n", + "999 #zh.wikipedia Germany Main 1 \n", + "1000 #zh.wikipedia Hong Kong Main 440 \n", + "1001 #zh.wikipedia Hong Kong Template 2 \n", + "1002 #zh.wikipedia Israel Main 1 \n", + "1003 #zh.wikipedia Italy Main 1 \n", + "1004 #zh.wikipedia Japan Main 12 \n", + "1005 #zh.wikipedia Japan Talk 1 \n", + "1006 #zh.wikipedia Macao Main 12 \n", + "1007 #zh.wikipedia Macao Wikipedia 1 \n", + "1008 #zh.wikipedia Malaysia Main 45 \n", + "1009 #zh.wikipedia New Zealand Main 2 \n", + "1010 #zh.wikipedia Portugal Main 2 \n", + "1011 #zh.wikipedia Republic of Korea Main 1 \n", + "1012 #zh.wikipedia Singapore Main 12 \n", + "1013 #zh.wikipedia Taiwan File 3 \n", + "1014 #zh.wikipedia Taiwan Main 659 \n", + "1015 #zh.wikipedia Taiwan Talk 8 \n", + "1016 #zh.wikipedia Taiwan Template 2 \n", + "1017 #zh.wikipedia Taiwan User 1 \n", + "1018 #zh.wikipedia Taiwan User talk 1 \n", + "1019 #zh.wikipedia Taiwan Wikipedia 7 \n", + "1020 #zh.wikipedia United Kingdom Main 2 \n", + "1021 #zh.wikipedia United States Main 39 \n", + "1022 #zh.wikipedia United States Template 1 \n", + "1023 #zh.wikipedia Vietnam Main 5 \n", + "\n", + " outliers1 outliers5 outliers10 \n", + "0 0.0 0.0 2.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "5 0.0 0.0 1.0 \n", + "6 1.0 4.0 8.0 \n", + "7 0.0 0.0 0.0 \n", + "8 0.0 0.0 0.0 \n", + "9 0.0 0.0 0.0 \n", + "10 0.0 0.0 4.0 \n", + "11 0.0 0.0 1.0 \n", + "12 0.0 0.0 0.0 \n", + "13 0.0 0.0 0.0 \n", + "14 0.0 0.0 1.0 \n", + "15 0.0 0.0 0.0 \n", + "16 0.0 2.0 2.0 \n", + "17 0.0 2.0 4.0 \n", + "18 0.0 0.0 0.0 \n", + "19 0.0 0.0 0.0 \n", + "20 0.0 0.0 1.0 \n", + "21 0.0 0.0 0.0 \n", + "22 1.0 1.0 1.0 \n", + "23 0.0 0.0 0.0 \n", + "24 0.0 0.0 0.0 \n", + "25 0.0 0.0 0.0 \n", + "26 0.0 3.0 8.0 \n", + "27 0.0 0.0 0.0 \n", + "28 0.0 0.0 0.0 \n", + "29 0.0 0.0 0.0 \n", + "... ... ... ... \n", + "994 0.0 0.0 1.0 \n", + "995 0.0 0.0 2.0 \n", + "996 0.0 0.0 0.0 \n", + "997 0.0 0.0 0.0 \n", + "998 0.0 0.0 0.0 \n", + "999 0.0 0.0 0.0 \n", + "1000 2.0 14.0 22.0 \n", + "1001 0.0 0.0 0.0 \n", + "1002 0.0 0.0 1.0 \n", + "1003 0.0 0.0 0.0 \n", + "1004 1.0 2.0 3.0 \n", + "1005 0.0 0.0 0.0 \n", + "1006 0.0 0.0 1.0 \n", + "1007 0.0 0.0 0.0 \n", + "1008 0.0 0.0 1.0 \n", + "1009 0.0 0.0 0.0 \n", + "1010 0.0 0.0 0.0 \n", + "1011 0.0 0.0 0.0 \n", + "1012 0.0 0.0 0.0 \n", + "1013 0.0 0.0 0.0 \n", + "1014 0.0 6.0 39.0 \n", + "1015 0.0 1.0 1.0 \n", + "1016 0.0 0.0 0.0 \n", + "1017 0.0 0.0 0.0 \n", + "1018 0.0 0.0 1.0 \n", + "1019 0.0 2.0 6.0 \n", + "1020 0.0 0.0 0.0 \n", + "1021 0.0 1.0 5.0 \n", + "1022 0.0 0.0 0.0 \n", + "1023 0.0 0.0 0.0 \n", "\n", - "[132563 rows x 6 columns]" + "[1024 rows x 7 columns]" ] }, - "execution_count": 121, + "execution_count": 153, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "def outliers(t, name):\n", - " def outliers_(x):\n", - " return np.sum(x >= t)\n", - " outliers_.__name__ = 'outliers%s' % name\n", - " return outliers_\n", - "\n", - "t1 = milan_data[metric].quantile(0.99)\n", - "t5 = milan_data[metric].quantile(0.95)\n", - "t10 = milan_data[metric].quantile(0.90)\n", - "milan_oracle = milan_data.groupby([\"Grid\", \"Country\"]).agg({metric: [\n", + "t1 = wiki_data[metric].quantile(0.99)\n", + "t5 = wiki_data[metric].quantile(0.95)\n", + "t10 = wiki_data[metric].quantile(0.90)\n", + "wiki_oracle = wiki_data.groupby(attributes).agg({metric: [\n", " 'count',\n", " outliers(t1, \"1\"),\n", " outliers(t5, \"5\"),\n", " outliers(t10, \"10\")\n", "]}).reset_index(col_level=1)\n", - "milan_oracle.columns = milan_oracle.columns.get_level_values(1)\n", - "milan_oracle" + "wiki_oracle.columns = wiki_oracle.columns.get_level_values(1)\n", + "wiki_oracle" ] }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 154, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "milan_oracle.to_csv('lib/src/test/resources/milan_oracle_cubed.csv')" + "wiki_oracle.to_csv('lib/src/test/resources/wiki_oracle_cubed.csv')" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "collapsed": true + }, "source": [ - "# Wikipedia" + "# Big Wiki" ] }, { "cell_type": "code", - "execution_count": 147, - "metadata": {}, + "execution_count": 206, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "wiki_data = pd.read_json('~/Downloads/wikipedia-2015-09-12', lines=True)" + "big_wiki_data = pd.read_csv('~/Downloads/wiki-10M.csv')" ] }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 170, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "comment 2672206\n", + "isNew 2\n", + "isUnpatrolled 2\n", + "commentLength 317\n", + "deltaBucket 1765\n", + "regionName 2290\n", + "namespace 1005\n", + "isMinor 2\n", + "channel 53\n", + "added 27858\n", + "isRobot 2\n", + "deleted 501\n", + "countryIsoCode 219\n", + "__time 10213634\n", + "user 531887\n", + "delta 28358\n", + "regionIsoCode 1131\n", + "count 1\n", + "countryName 220\n", + "metroCode 209\n", + "cityName 26859\n", + "flags 11\n", + "diffUrl 10209896\n", + "isAnonymous 2\n", + "page 5448626\n", + "dtype: int64" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "big_wiki_data.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 215, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ - "metric = \"delta\"\n", - "attributes = [\"channel\", \"countryName\", \"namespace\"]\n", - "wiki_data = wiki_data[attributes + [metric]]\n", - "wiki_data = wiki_data[(np.isfinite(wiki_data[metric])) & (wiki_data[metric] != 0)]\n", - "wiki_data[metric] = np.log(np.absolute(wiki_data[metric]))" + "metric = \"added\"\n", + "attributes = [\"channel\", \"namespace\"]\n", + "big_wiki_data = big_wiki_data.set_index(pd.DatetimeIndex(big_wiki_data['__time']))\n", + "big_wiki_data = big_wiki_data[attributes + [metric]]\n", + "big_wiki_data = big_wiki_data[(np.isfinite(big_wiki_data[metric])) & (big_wiki_data[metric] > 0)]" ] }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 216, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
channelnamespaceadded
__time
2018-01-08 00:00:01.258#ru.wikipediaMediaWiki6.0
2018-01-08 00:00:01.460#pl.wikipediaWikipedysta1.0
2018-01-08 00:00:02.143#en.wikipediaTalk23.0
2018-01-08 00:00:02.179#en.wikipediaMain9.0
2018-01-08 00:00:02.253#ceb.wikipediaMain8.0
\n", + "
" + ], "text/plain": [ - "" + " channel namespace added\n", + "__time \n", + "2018-01-08 00:00:01.258 #ru.wikipedia MediaWiki 6.0\n", + "2018-01-08 00:00:01.460 #pl.wikipedia Wikipedysta 1.0\n", + "2018-01-08 00:00:02.143 #en.wikipedia Talk 23.0\n", + "2018-01-08 00:00:02.179 #en.wikipedia Main 9.0\n", + "2018-01-08 00:00:02.253 #ceb.wikipedia Main 8.0" ] }, - "execution_count": 149, + "execution_count": 216, "metadata": {}, "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFf1JREFUeJzt3X+w3XWd3/Hna4msLK4GVnuHJkzDjBkdFqriHYi13bnK\nigEdwx+ug0MlWGr+EK3bYUZDOx2m/uiw03ZdaV07GckStlSWYXXICBoz0TNOZ4oCakFAyy2iJA3g\nGn5spKvN7rt/nE/wmO+9ueee3JtzLnk+Zs7c7/f9/Xy+9/PhhPO63x/nnFQVkiQN+o1xD0CSNHkM\nB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6Vo17AKN65StfWevWrRup789//nNO\nPfXUpR3QceYcJoNzmAzOYTj33XffX1XVq4Zpu2LDYd26ddx7770j9e31eszMzCztgI4z5zAZnMNk\ncA7DSfLjYdt6WkmS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktSxYt8hraW3\nbuudLyw/dv07xjgSSePmkYMkqcNwkCR1GA6SpI6hwiHJ6iS3J/lBkoeTvCnJ6Ul2J3mk/TyttU2S\nG5LMJrk/yXkD+9nc2j+SZPNA/Y1JHmh9bkiSpZ+qJGlYwx45fAb4alW9Fngd8DCwFdhTVeuBPW0d\n4GJgfXtsAT4HkOR04DrgAuB84LrDgdLafGCg38Zjm5Yk6VgsGA5JXgH8HnAjQFX9sqqeATYBO1qz\nHcClbXkTcHP13Q2sTnIG8HZgd1UdqKqngd3Axrbt5VV1d1UVcPPAviRJYzDMkcNZwE+BP0vy3SSf\nT3IqMFVV+1ubJ4CptrwGeHyg/95WO1p97xx1SdKYDPM+h1XAecCHq+pbST7Dr04hAVBVlaSWY4CD\nkmyhf6qKqakper3eSPs5ePDgyH0nxXLM4ZpzD72wfDz++/g8TAbnMBkmbQ7DhMNeYG9Vfaut304/\nHJ5MckZV7W+nhp5q2/cBZw70X9tq+4CZI+q9Vl87R/uOqtoGbAOYnp6uUb9Sz68UnNuVg2+Cu3xp\n9z0Xn4fJ4Bwmw6TNYcHTSlX1BPB4kte00oXAQ8BO4PAdR5uBO9ryTuCKdtfSBuDZdvppF3BRktPa\nheiLgF1t23NJNrS7lK4Y2JckaQyG/fiMDwO3JDkZeBR4P/1guS3JVcCPgfe0tncBlwCzwPOtLVV1\nIMkngHtau49X1YG2/EHgJuAU4CvtIUkak6HCoaq+B0zPsenCOdoWcPU8+9kObJ+jfi9wzjBjkSQt\nP98hLUnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1\nGA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUsdQ\n4ZDksSQPJPlekntb7fQku5M80n6e1upJckOS2ST3JzlvYD+bW/tHkmweqL+x7X+29c1ST1SSNLzF\nHDm8papeX1XTbX0rsKeq1gN72jrAxcD69tgCfA76YQJcB1wAnA9cdzhQWpsPDPTbOPKMJEnH7FhO\nK20CdrTlHcClA/Wbq+9uYHWSM4C3A7ur6kBVPQ3sBja2bS+vqrurqoCbB/YlSRqDYcOhgK8luS/J\nllabqqr9bfkJYKotrwEeH+i7t9WOVt87R12SNCarhmz3j6tqX5K/B+xO8oPBjVVVSWrph/frWjBt\nAZiamqLX6420n4MHD47cd1IsxxyuOffQC8vH47+Pz8NkcA6TYdLmMFQ4VNW+9vOpJF+if83gySRn\nVNX+dmroqdZ8H3DmQPe1rbYPmDmi3mv1tXO0n2sc24BtANPT0zUzMzNXswX1ej1G7TsplmMOV269\n84Xlxy5f2n3PxedhMjiHyTBpc1jwtFKSU5P89uFl4CLg+8BO4PAdR5uBO9ryTuCKdtfSBuDZdvpp\nF3BRktPaheiLgF1t23NJNrS7lK4Y2JckaQyGOXKYAr7U7i5dBfy3qvpqknuA25JcBfwYeE9rfxdw\nCTALPA+8H6CqDiT5BHBPa/fxqjrQlj8I3AScAnylPSRJY7JgOFTVo8Dr5qj/DLhwjnoBV8+zr+3A\n9jnq9wLnDDFeSdJx4DukJUkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaD\nJKnDcJAkdQz7fQ7SUa0b/Ljv698xxpFIWgoeOUiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6S\npA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHUOHQ5KTknw3yZfb+llJvpVkNslfJDm51X+zrc+27esG\n9nFtq/8wydsH6htbbTbJ1qWbniRpFIs5cvgI8PDA+h8Bn66qVwNPA1e1+lXA063+6daOJGcDlwG/\nC2wE/rQFzknAZ4GLgbOB97a2kqQxGSockqwF3gF8vq0HeCtwe2uyA7i0LW9q67TtF7b2m4Bbq+oX\nVfUjYBY4vz1mq+rRqvolcGtrK0kak2GPHP4E+Cjwd239d4BnqupQW98LrGnLa4DHAdr2Z1v7F+pH\n9JmvLkkakwW/7CfJO4Gnquq+JDPLP6SjjmULsAVgamqKXq830n4OHjw4ct9JsRxzuObcQy8sL3bf\no/T1eZgMzmEyTNochvkmuDcD70pyCfBS4OXAZ4DVSVa1o4O1wL7Wfh9wJrA3ySrgFcDPBuqHDfaZ\nr/5rqmobsA1genq6ZmZmhhh+V6/XY9S+k2I55nDl4Le5Xb64fY/S1+dhMjiHyTBpc1jwtFJVXVtV\na6tqHf0Lyl+vqsuBbwDvbs02A3e05Z1tnbb961VVrX5Zu5vpLGA98G3gHmB9u/vp5PY7di7J7CRJ\nIzmW75D+GHBrkk8C3wVubPUbgT9PMgscoP9iT1U9mOQ24CHgEHB1Vf0tQJIPAbuAk4DtVfXgMYxL\nknSMFhUOVdUDem35Ufp3Gh3Z5m+AP5in/6eAT81Rvwu4azFjkSQtH98hLUnqMBwkSR2GgySpw3CQ\nJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lS\nh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqWDAckrw0ybeT/M8kDyb5\nt61+VpJvJZlN8hdJTm7132zrs237uoF9XdvqP0zy9oH6xlabTbJ16acpSVqMYY4cfgG8tapeB7we\n2JhkA/BHwKer6tXA08BVrf1VwNOt/unWjiRnA5cBvwtsBP40yUlJTgI+C1wMnA28t7WVJI3JguFQ\nfQfb6kvao4C3Are3+g7g0ra8qa3Ttl+YJK1+a1X9oqp+BMwC57fHbFU9WlW/BG5tbSVJY7JqmEbt\nr/v7gFfT/yv/fwPPVNWh1mQvsKYtrwEeB6iqQ0meBX6n1e8e2O1gn8ePqF8wzzi2AFsApqam6PV6\nwwy/4+DBgyP3nRTLMYdrzj30wvJi9z1KX5+HyeAcJsOkzWGocKiqvwVen2Q18CXgtcs6qvnHsQ3Y\nBjA9PV0zMzMj7afX6zFq30mxHHO4cuudLyw/dvni9j1KX5+HyeAcJsOkzWFRdytV1TPAN4A3AauT\nHA6XtcC+trwPOBOgbX8F8LPB+hF95qtLksZkmLuVXtWOGEhyCvA24GH6IfHu1mwzcEdb3tnWadu/\nXlXV6pe1u5nOAtYD3wbuAda3u59Opn/ReudSTE6SNJphTiudAexo1x1+A7itqr6c5CHg1iSfBL4L\n3Nja3wj8eZJZ4AD9F3uq6sEktwEPAYeAq9vpKpJ8CNgFnARsr6oHl2yGkqRFWzAcqup+4A1z1B+l\nf6fRkfW/Af5gnn19CvjUHPW7gLuGGK8k6TjwHdKSpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaD\nJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiS\nOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1LBgOSc5M8o0kDyV5MMlHWv30JLuTPNJ+ntbqSXJDktkk\n9yc5b2Bfm1v7R5JsHqi/MckDrc8NSbIck5UkDWeYI4dDwDVVdTawAbg6ydnAVmBPVa0H9rR1gIuB\n9e2xBfgc9MMEuA64ADgfuO5woLQ2Hxjot/HYpyZJGtWC4VBV+6vqO235r4GHgTXAJmBHa7YDuLQt\nbwJurr67gdVJzgDeDuyuqgNV9TSwG9jYtr28qu6uqgJuHtiXJGkM0n89HrJxsg74JnAO8JOqWt3q\nAZ6uqtVJvgxcX1X/vW3bA3wMmAFeWlWfbPV/A/xfoNfa/36r/xPgY1X1zjl+/xb6RyNMTU298dZb\nb138jIGDBw/yspe9bKS+k2I55vDAvmdfWD53zSuWva/Pw2RwDpPheMzhLW95y31VNT1M21XD7jTJ\ny4C/BP6wqp4bvCxQVZVk+JQZUVVtA7YBTE9P18zMzEj76fV6jNp3UizHHK7ceucLy49dvrh9j9LX\n52EyOIfJMGlzGOpupSQvoR8Mt1TVF1v5yXZKiPbzqVbfB5w50H1tqx2tvnaOuiRpTIa5WynAjcDD\nVfXHA5t2AofvONoM3DFQv6LdtbQBeLaq9gO7gIuSnNYuRF8E7Grbnkuyof2uKwb2JUkag2FOK70Z\neB/wQJLvtdq/Aq4HbktyFfBj4D1t213AJcAs8DzwfoCqOpDkE8A9rd3Hq+pAW/4gcBNwCvCV9pAk\njcmC4dAuLM/3voML52hfwNXz7Gs7sH2O+r30L3JLkiaA75CWJHUYDpKkDsNBktRhOEiSOgwHSVKH\n4SBJ6jAcJEkdJ2Q4PLDvWdZtvZN1A58HJEn6lRMyHCRJR2c4SJI6DAdJUofhIEnqMBwkSR2GgySp\nw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpI4FwyHJ9iRPJfn+\nQO30JLuTPNJ+ntbqSXJDktkk9yc5b6DP5tb+kSSbB+pvTPJA63NDkiz1JHV8Hf4iJb9MSVq5hjly\nuAnYeERtK7CnqtYDe9o6wMXA+vbYAnwO+mECXAdcAJwPXHc4UFqbDwz0O/J3SZKOs1ULNaiqbyZZ\nd0R5EzDTlncAPeBjrX5zVRVwd5LVSc5obXdX1QGAJLuBjUl6wMur6u5Wvxm4FPjKsUxqOQ3+NfzY\n9e8Y40gkafmMes1hqqr2t+UngKm2vAZ4fKDd3lY7Wn3vHHVJ0hgteOSwkKqqJLUUg1lIki30T1cx\nNTVFr9cbaT9Tp8A15x4CWPQ+Dvcbpe9SOnjw4JL//mOZ22DfQUfbz3LM4XhzDpPBOSy9UcPhySRn\nVNX+dtroqVbfB5w50G5tq+3jV6ehDtd7rb52jvZzqqptwDaA6enpmpmZma/pUf2nW+7gPz7Qn/pj\nly9uH1cOnlZaZN+l1Ov1GHX+8zmWuV05z8Xno+1nOeZwvDmHyeAclt6op5V2AofvONoM3DFQv6Ld\ntbQBeLadftoFXJTktHYh+iJgV9v2XJIN7S6lKwb2JUkakwWPHJJ8gf5f/a9Mspf+XUfXA7cluQr4\nMfCe1vwu4BJgFngeeD9AVR1I8gngntbu44cvTgMfpH9H1Cn0L0RP7MXolcAL5pKWwjB3K713nk0X\nztG2gKvn2c92YPsc9XuBcxYahyaP72OQXrx8h7QkqcNwkCR1HPOtrBqN1wYkTTLDQceNgSitHJ5W\nWoHWbb2TB/Y9uyI+3G6+D+EbnIOkyeORg+bkX/nSic1wOAEt9oXfoJBOPIbDMlhJL6ae1pE0F8NB\ni2KYSCcGw2GCLeURyCS/qK+kIy3pROHdSpKkDo8cNFHmO4pYrqMoj1SkuRkOmljznQobJkCO3CZp\ncQwHrWhHu5YyyddZpElnOEwAT3OMzzAB4nOiE5Hh8CLgX8jLa77/voaGXsy8W0kakZ8PpRczjxxW\niFFegHzROn48NagXG48cJEkdHjlIS8yjCL0YGA7SMvJuKK1UhoM0Zou9NmSY6HgwHKQV5nh8xIhk\nOEgrmHekablMTDgk2Qh8BjgJ+HxVXT/mIUkr1tGOIpbqCMMjlRe3iQiHJCcBnwXeBuwF7kmys6oe\nGu/IpJVvoc+fuubcQ1y59U5f4PVrJiIcgPOB2ap6FCDJrcAmwHCQjhNPUWnQpITDGuDxgfW9wAVj\nGoukRVqOYPFIZrxSVeMeA0neDWysqn/e1t8HXFBVHzqi3RZgS1t9DfDDEX/lK4G/GrHvpHAOk8E5\nTAbnMJx/UFWvGqbhpBw57APOHFhf22q/pqq2AduO9Zclubeqpo91P+PkHCaDc5gMzmHpTcpnK90D\nrE9yVpKTgcuAnWMekySdsCbiyKGqDiX5ELCL/q2s26vqwTEPS5JOWBMRDgBVdRdw13H6dcd8amoC\nOIfJ4Bwmg3NYYhNxQVqSNFkm5ZqDJGmCnFDhkGRjkh8mmU2yddzjWawkZyb5RpKHkjyY5CPjHtOo\nkpyU5LtJvjzusYwqyeoktyf5QZKHk7xp3GNarCT/sv1b+n6SLyR56bjHtJAk25M8leT7A7XTk+xO\n8kj7edo4x7iQeebw79u/pfuTfCnJ6nGO8YQJh4GP6LgYOBt4b5KzxzuqRTsEXFNVZwMbgKtX4BwO\n+wjw8LgHcYw+A3y1ql4LvI4VNp8ka4B/AUxX1Tn0bwa5bLyjGspNwMYjaluBPVW1HtjT1ifZTXTn\nsBs4p6r+IfC/gGuP96AGnTDhwMBHdFTVL4HDH9GxYlTV/qr6Tlv+a/ovRmvGO6rFS7IWeAfw+XGP\nZVRJXgH8HnAjQFX9sqqeGe+oRrIKOCXJKuC3gP8z5vEsqKq+CRw4orwJ2NGWdwCXHtdBLdJcc6iq\nr1XVobZ6N/33e43NiRQOc31Ex4p7YT0syTrgDcC3xjuSkfwJ8FHg78Y9kGNwFvBT4M/a6bHPJzl1\n3INajKraB/wH4CfAfuDZqvraeEc1sqmq2t+WnwCmxjmYJfDPgK+McwAnUji8aCR5GfCXwB9W1XPj\nHs9iJHkn8FRV3TfusRyjVcB5wOeq6g3Az5n8Uxm/pp2X30Q/6P4+cGqSfzreUR276t+CuWJvw0zy\nr+mfQr5lnOM4kcJhqI/omHRJXkI/GG6pqi+OezwjeDPwriSP0T+199Yk/3W8QxrJXmBvVR0+crud\nflisJL8P/KiqflpV/w/4IvCPxjymUT2Z5AyA9vOpMY9nJEmuBN4JXF5jfp/BiRQOK/4jOpKE/jnu\nh6vqj8c9nlFU1bVVtbaq1tF/Dr5eVSvur9WqegJ4PMlrWulCVt5HzP8E2JDkt9q/rQtZYRfVB+wE\nNrflzcAdYxzLSNoXnn0UeFdVPT/u8Zww4dAu9Bz+iI6HgdtW4Ed0vBl4H/2/tr/XHpeMe1AnsA8D\ntyS5H3g98O/GPJ5FaUc9twPfAR6g/3owUe/SnUuSLwD/A3hNkr1JrgKuB96W5BH6R0QT/U2S88zh\nPwO/Dexu/2//l7GO0XdIS5KOdMIcOUiShmc4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKk\njv8PKv7S972dhRMAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ - "wiki_data[\"delta\"].hist(bins=100)" + "big_wiki_data.head()" ] }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 217, "metadata": {}, "outputs": [ { @@ -2104,531 +3087,252 @@ " \n", " \n", " channel\n", - " countryName\n", " namespace\n", - " min\n", - " max\n", - " m0\n", - " m1\n", - " m2\n", - " m3\n", - " m4\n", - " m5\n", - " m6\n", - " m7\n", - " m8\n", + " __time\n", + " count\n", + " outliers1\n", " \n", " \n", " \n", " \n", " 0\n", " #ar.wikipedia\n", - " Algeria\n", - " Main\n", - " 0.000000\n", - " 6.666957\n", - " 18.0\n", - " 57.644747\n", - " 234.884785\n", - " 1108.007978\n", - " 5842.083979\n", - " 3.325478e+04\n", - " 1.988641e+05\n", - " 1.226715e+06\n", - " 7.717880e+06\n", + " 16\n", + " 2018-01-15 05:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", " 1\n", " #ar.wikipedia\n", - " Algeria\n", - " نقاش\n", - " 5.267858\n", - " 5.267858\n", - " 1.0\n", - " 5.267858\n", - " 27.750330\n", - " 146.184800\n", - " 770.080792\n", - " 4.056676e+03\n", - " 2.137000e+04\n", - " 1.125741e+05\n", - " 5.930244e+05\n", + " Main\n", + " 2018-01-08 00:00:00\n", + " 149\n", + " 0.0\n", " \n", " \n", " 2\n", " #ar.wikipedia\n", - " Australia\n", " Main\n", - " 5.564520\n", - " 5.564520\n", - " 1.0\n", - " 5.564520\n", - " 30.963887\n", - " 172.299183\n", - " 958.762321\n", - " 5.335052e+03\n", - " 2.968701e+04\n", - " 1.651940e+05\n", - " 9.192252e+05\n", + " 2018-01-08 01:00:00\n", + " 185\n", + " 0.0\n", " \n", " \n", " 3\n", " #ar.wikipedia\n", - " Bahrain\n", " Main\n", - " 3.784190\n", - " 3.784190\n", - " 1.0\n", - " 3.784190\n", - " 14.320091\n", - " 54.189941\n", - " 205.065012\n", - " 7.760049e+02\n", - " 2.936550e+03\n", - " 1.111246e+04\n", - " 4.205166e+04\n", + " 2018-01-08 02:00:00\n", + " 105\n", + " 0.0\n", " \n", " \n", " 4\n", " #ar.wikipedia\n", - " Belgium\n", " Main\n", - " 5.459586\n", - " 5.459586\n", - " 1.0\n", - " 5.459586\n", - " 29.807074\n", - " 162.734269\n", - " 888.461660\n", - " 4.850632e+03\n", - " 2.648244e+04\n", - " 1.445832e+05\n", - " 7.893641e+05\n", + " 2018-01-08 03:00:00\n", + " 64\n", + " 0.0\n", " \n", " \n", " 5\n", " #ar.wikipedia\n", - " Denmark\n", - " ويكيبيديا\n", - " 6.030685\n", - " 6.030685\n", - " 1.0\n", - " 6.030685\n", - " 36.369165\n", - " 219.330986\n", - " 1322.716142\n", - " 7.976885e+03\n", - " 4.810608e+04\n", - " 2.901126e+05\n", - " 1.749578e+06\n", - " \n", - " \n", - " 6\n", - " #ar.wikipedia\n", - " Egypt\n", " Main\n", - " 0.693147\n", - " 8.664060\n", - " 28.0\n", - " 124.659483\n", - " 680.894125\n", - " 4142.018720\n", - " 26951.803294\n", - " 1.839678e+05\n", - " 1.302402e+06\n", - " 9.492862e+06\n", - " 7.087854e+07\n", + " 2018-01-08 04:00:00\n", + " 79\n", + " 0.0\n", + " \n", + " \n", + " 6\n", + " #ar.wikipedia\n", + " Main\n", + " 2018-01-08 05:00:00\n", + " 63\n", + " 0.0\n", " \n", " \n", " 7\n", " #ar.wikipedia\n", - " Hashemite Kingdom of Jordan\n", " Main\n", - " 0.693147\n", - " 5.164786\n", - " 8.0\n", - " 19.517211\n", - " 59.503557\n", - " 216.944287\n", - " 908.031706\n", - " 4.171502e+03\n", - " 2.025181e+04\n", - " 1.013195e+05\n", - " 5.148902e+05\n", + " 2018-01-08 06:00:00\n", + " 122\n", + " 0.0\n", " \n", " \n", " 8\n", " #ar.wikipedia\n", - " Hashemite Kingdom of Jordan\n", - " نقاش المستخدم\n", - " 4.127134\n", - " 4.127134\n", - " 1.0\n", - " 4.127134\n", - " 17.033238\n", - " 70.298463\n", - " 290.131205\n", - " 1.197410e+03\n", - " 4.941874e+03\n", - " 2.039578e+04\n", - " 8.417612e+04\n", + " Main\n", + " 2018-01-08 07:00:00\n", + " 185\n", + " 0.0\n", " \n", " \n", " 9\n", " #ar.wikipedia\n", - " Iran\n", " Main\n", - " 0.693147\n", - " 0.693147\n", - " 1.0\n", - " 0.693147\n", - " 0.480453\n", - " 0.333025\n", - " 0.230835\n", - " 1.600027e-01\n", - " 1.109054e-01\n", - " 7.687378e-02\n", - " 5.328484e-02\n", + " 2018-01-08 08:00:00\n", + " 166\n", + " 0.0\n", " \n", " \n", " 10\n", " #ar.wikipedia\n", - " Iraq\n", " Main\n", - " 0.000000\n", - " 6.520621\n", - " 19.0\n", - " 61.367666\n", - " 287.348181\n", - " 1524.935621\n", - " 8645.085403\n", - " 5.101748e+04\n", - " 3.091354e+05\n", - " 1.907096e+06\n", - " 1.191112e+07\n", + " 2018-01-08 09:00:00\n", + " 118\n", + " 0.0\n", " \n", " \n", " 11\n", " #ar.wikipedia\n", - " Iraq\n", - " نقاش\n", - " 5.926926\n", - " 6.278521\n", - " 2.0\n", - " 12.205447\n", - " 74.548283\n", - " 455.701992\n", - " 2787.931246\n", - " 1.707021e+04\n", - " 1.046042e+05\n", - " 6.415184e+05\n", - " 3.937453e+06\n", + " Main\n", + " 2018-01-08 10:00:00\n", + " 109\n", + " 0.0\n", " \n", " \n", " 12\n", " #ar.wikipedia\n", - " Israel\n", " Main\n", - " 0.000000\n", - " 5.442418\n", - " 4.0\n", - " 13.757495\n", - " 64.504332\n", - " 308.849582\n", - " 1507.522267\n", - " 7.485986e+03\n", - " 3.773624e+04\n", - " 1.926923e+05\n", - " 9.947230e+05\n", + " 2018-01-08 11:00:00\n", + " 148\n", + " 0.0\n", " \n", " \n", " 13\n", " #ar.wikipedia\n", - " Kuwait\n", " Main\n", - " 0.693147\n", - " 5.472271\n", - " 9.0\n", - " 27.385355\n", - " 112.181294\n", - " 525.630362\n", - " 2608.214237\n", - " 1.329461e+04\n", - " 6.875724e+04\n", - " 3.588217e+05\n", - " 1.884294e+06\n", + " 2018-01-08 12:00:00\n", + " 205\n", + " 0.0\n", " \n", " \n", " 14\n", " #ar.wikipedia\n", - " Kuwait\n", - " نقاش المستخدم\n", - " 6.272877\n", - " 6.272877\n", - " 1.0\n", - " 6.272877\n", - " 39.348986\n", - " 246.831349\n", - " 1548.342694\n", - " 9.712563e+03\n", - " 6.092571e+04\n", - " 3.821795e+05\n", - " 2.397365e+06\n", + " Main\n", + " 2018-01-08 13:00:00\n", + " 216\n", + " 0.0\n", " \n", " \n", " 15\n", " #ar.wikipedia\n", - " Lebanon\n", " Main\n", - " 5.236442\n", - " 5.996452\n", - " 2.0\n", - " 11.232894\n", - " 63.377762\n", - " 359.201990\n", - " 2044.811514\n", - " 1.169018e+04\n", - " 6.710735e+04\n", - " 3.867371e+05\n", - " 2.237002e+06\n", + " 2018-01-08 14:00:00\n", + " 207\n", + " 0.0\n", " \n", " \n", " 16\n", " #ar.wikipedia\n", - " Libya\n", " Main\n", - " 4.488636\n", - " 7.472501\n", - " 3.0\n", - " 19.227964\n", - " 128.792904\n", - " 891.425647\n", - " 6312.404206\n", - " 4.538466e+04\n", - " 3.295322e+05\n", - " 2.407739e+06\n", - " 1.766220e+07\n", + " 2018-01-08 15:00:00\n", + " 165\n", + " 0.0\n", " \n", " \n", " 17\n", " #ar.wikipedia\n", - " Morocco\n", " Main\n", - " 0.000000\n", - " 7.754053\n", - " 13.0\n", - " 45.491987\n", - " 239.320305\n", - " 1502.900431\n", - " 10204.694809\n", - " 7.174515e+04\n", - " 5.133805e+05\n", - " 3.712545e+06\n", - " 2.704703e+07\n", + " 2018-01-08 16:00:00\n", + " 126\n", + " 0.0\n", " \n", " \n", " 18\n", " #ar.wikipedia\n", - " Morocco\n", - " بوابة\n", - " 5.521461\n", - " 5.521461\n", - " 1.0\n", - " 5.521461\n", - " 30.486531\n", - " 168.330188\n", - " 929.428552\n", - " 5.131803e+03\n", - " 2.833505e+04\n", - " 1.564509e+05\n", - " 8.638374e+05\n", + " Main\n", + " 2018-01-08 17:00:00\n", + " 187\n", + " 0.0\n", " \n", " \n", " 19\n", " #ar.wikipedia\n", - " Morocco\n", - " قالب\n", - " 4.382027\n", - " 4.382027\n", - " 1.0\n", - " 4.382027\n", - " 19.202157\n", - " 84.144365\n", - " 368.722850\n", - " 1.615753e+03\n", - " 7.080274e+03\n", - " 3.102595e+04\n", - " 1.359565e+05\n", + " Main\n", + " 2018-01-08 18:00:00\n", + " 319\n", + " 0.0\n", " \n", " \n", " 20\n", " #ar.wikipedia\n", - " Morocco\n", - " نقاش\n", - " 6.666957\n", - " 6.666957\n", - " 1.0\n", - " 6.666957\n", - " 44.448313\n", - " 296.334981\n", - " 1975.652517\n", - " 1.317159e+04\n", - " 8.781442e+04\n", - " 5.854550e+05\n", - " 3.903203e+06\n", + " Main\n", + " 2018-01-08 19:00:00\n", + " 169\n", + " 0.0\n", " \n", " \n", " 21\n", " #ar.wikipedia\n", - " Morocco\n", - " نقاش المستخدم\n", - " 5.645447\n", - " 5.645447\n", - " 1.0\n", - " 5.645447\n", - " 31.871071\n", - " 179.926437\n", - " 1015.765146\n", - " 5.734448e+03\n", - " 3.237352e+04\n", - " 1.827630e+05\n", - " 1.031779e+06\n", + " Main\n", + " 2018-01-08 20:00:00\n", + " 166\n", + " 0.0\n", " \n", " \n", " 22\n", " #ar.wikipedia\n", - " Morocco\n", - " ويكيبيديا\n", - " 8.711937\n", - " 8.711937\n", - " 1.0\n", - " 8.711937\n", - " 75.897851\n", - " 661.217316\n", - " 5760.483781\n", - " 5.018497e+04\n", - " 4.372083e+05\n", - " 3.808932e+06\n", - " 3.318317e+07\n", + " Main\n", + " 2018-01-08 21:00:00\n", + " 166\n", + " 0.0\n", " \n", " \n", " 23\n", " #ar.wikipedia\n", - " Oman\n", " Main\n", - " 1.386294\n", - " 3.806662\n", - " 2.0\n", - " 5.192957\n", - " 16.412491\n", - " 57.825323\n", - " 213.673148\n", - " 8.044423e+02\n", - " 3.049848e+03\n", - " 1.159256e+04\n", - " 4.410515e+04\n", + " 2018-01-08 22:00:00\n", + " 116\n", + " 0.0\n", " \n", " \n", " 24\n", " #ar.wikipedia\n", - " Palestine\n", " Main\n", - " 0.000000\n", - " 3.465736\n", - " 4.0\n", - " 6.931472\n", - " 24.022651\n", - " 83.256163\n", - " 288.543873\n", - " 1.000017e+03\n", - " 3.465794e+03\n", - " 1.201153e+04\n", - " 4.162878e+04\n", + " 2018-01-08 23:00:00\n", + " 133\n", + " 0.0\n", " \n", " \n", " 25\n", " #ar.wikipedia\n", - " Qatar\n", " Main\n", - " 2.197225\n", - " 4.804021\n", - " 3.0\n", - " 9.486152\n", - " 34.081175\n", - " 136.821624\n", - " 594.057905\n", - " 2.704686e+03\n", - " 1.264015e+04\n", - " 5.988422e+04\n", - " 2.856838e+05\n", + " 2018-01-09 00:00:00\n", + " 76\n", + " 0.0\n", " \n", " \n", " 26\n", " #ar.wikipedia\n", - " Saudi Arabia\n", " Main\n", - " 0.000000\n", - " 8.311398\n", - " 97.0\n", - " 308.790738\n", - " 1375.600910\n", - " 7038.339584\n", - " 39448.036322\n", - " 2.366367e+05\n", - " 1.497962e+06\n", - " 9.907004e+06\n", - " 6.793387e+07\n", + " 2018-01-09 01:00:00\n", + " 75\n", + " 0.0\n", " \n", " \n", " 27\n", " #ar.wikipedia\n", - " Saudi Arabia\n", - " مستخدم\n", - " 3.367296\n", - " 3.367296\n", - " 1.0\n", - " 3.367296\n", - " 11.338681\n", - " 38.180694\n", - " 128.565692\n", - " 4.329187e+02\n", - " 1.457765e+03\n", - " 4.908727e+03\n", - " 1.652914e+04\n", + " Main\n", + " 2018-01-09 02:00:00\n", + " 42\n", + " 0.0\n", " \n", " \n", " 28\n", " #ar.wikipedia\n", - " Saudi Arabia\n", - " ويكيبيديا\n", - " 3.610918\n", - " 3.610918\n", - " 1.0\n", - " 3.610918\n", - " 13.038728\n", - " 47.081777\n", - " 170.008432\n", - " 6.138865e+02\n", - " 2.216694e+03\n", - " 8.004299e+03\n", - " 2.890287e+04\n", + " Main\n", + " 2018-01-09 03:00:00\n", + " 38\n", + " 0.0\n", " \n", " \n", " 29\n", " #ar.wikipedia\n", - " Sudan\n", " Main\n", - " 4.477337\n", - " 4.976734\n", - " 2.0\n", - " 9.454071\n", - " 44.814424\n", - " 213.018272\n", - " 1015.311782\n", - " 4.852247e+03\n", - " 2.324979e+04\n", - " 1.116849e+05\n", - " 5.378129e+05\n", + " 2018-01-09 04:00:00\n", + " 86\n", + " 0.0\n", " \n", " \n", " ...\n", @@ -2637,761 +3341,368 @@ " ...\n", " ...\n", " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 994\n", - " #zh.wikipedia\n", - " China\n", - " Main\n", - " 1.098612\n", - " 6.689599\n", - " 22.0\n", - " 80.418326\n", - " 342.517685\n", - " 1597.834083\n", - " 7929.112435\n", - " 4.128033e+04\n", - " 2.237338e+05\n", - " 1.255615e+06\n", - " 7.263910e+06\n", " \n", " \n", - " 995\n", - " #zh.wikipedia\n", - " China\n", - " Wikipedia\n", - " 1.609438\n", - " 6.045005\n", - " 4.0\n", - " 19.648868\n", - " 111.067428\n", - " 656.498970\n", - " 3929.637204\n", - " 2.360291e+04\n", - " 1.419026e+05\n", - " 8.533656e+05\n", - " 5.132418e+06\n", - " \n", - " \n", - " 996\n", + " 119253\n", " #zh.wikipedia\n", - " Czech Republic\n", - " Main\n", - " 1.098612\n", - " 4.859812\n", - " 3.0\n", - " 9.569343\n", - " 37.863454\n", - " 163.185710\n", - " 729.264530\n", - " 3.326287e+03\n", - " 1.539243e+04\n", - " 7.202931e+04\n", - " 3.400451e+05\n", + " 模块\n", + " 2018-01-24 04:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 997\n", + " 119254\n", " #zh.wikipedia\n", - " Finland\n", - " Main\n", - " 3.332205\n", - " 3.332205\n", - " 1.0\n", - " 3.332205\n", - " 11.103587\n", - " 36.999422\n", - " 123.289642\n", - " 4.108263e+02\n", - " 1.368957e+03\n", - " 4.561646e+03\n", - " 1.520034e+04\n", + " 模块\n", + " 2018-01-24 08:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 998\n", + " 119255\n", " #zh.wikipedia\n", - " France\n", - " Main\n", - " 3.663562\n", - " 5.123964\n", - " 2.0\n", - " 8.787526\n", - " 39.676691\n", - " 183.700876\n", - " 869.466985\n", - " 4.192038e+03\n", - " 2.051605e+04\n", - " 1.015925e+05\n", - " 5.076205e+05\n", + " 模块\n", + " 2018-01-24 13:00:00\n", + " 2\n", + " 0.0\n", " \n", " \n", - " 999\n", + " 119256\n", " #zh.wikipedia\n", - " Germany\n", - " Main\n", - " 3.135494\n", - " 3.135494\n", - " 1.0\n", - " 3.135494\n", - " 9.831324\n", - " 30.826059\n", - " 96.654931\n", - " 3.030610e+02\n", - " 9.502459e+02\n", - " 2.979491e+03\n", - " 9.342176e+03\n", + " 模块\n", + " 2018-01-24 15:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1000\n", + " 119257\n", " #zh.wikipedia\n", - " Hong Kong\n", - " Main\n", - " 0.000000\n", - " 9.431322\n", - " 440.0\n", - " 1541.199742\n", - " 6864.103775\n", - " 34431.390925\n", - " 189571.354539\n", - " 1.133382e+06\n", - " 7.309490e+06\n", - " 5.048145e+07\n", - " 3.699213e+08\n", + " 模块\n", + " 2018-01-25 00:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1001\n", + " 119258\n", " #zh.wikipedia\n", - " Hong Kong\n", - " Template\n", - " 2.079442\n", - " 3.583519\n", - " 2.0\n", - " 5.662960\n", - " 17.165685\n", - " 55.009811\n", - " 183.604539\n", - " 6.298276e+02\n", - " 2.198520e+03\n", - " 7.756832e+03\n", - " 2.754389e+04\n", + " 模块\n", + " 2018-01-25 11:00:00\n", + " 3\n", + " 0.0\n", " \n", " \n", - " 1002\n", + " 119259\n", " #zh.wikipedia\n", - " Israel\n", - " Main\n", - " 6.265301\n", - " 6.265301\n", - " 1.0\n", - " 6.265301\n", - " 39.253999\n", - " 245.938129\n", - " 1540.876460\n", - " 9.654055e+03\n", - " 6.048556e+04\n", - " 3.789603e+05\n", - " 2.374300e+06\n", + " 模块\n", + " 2018-01-25 12:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1003\n", + " 119260\n", " #zh.wikipedia\n", - " Italy\n", - " Main\n", - " 5.247024\n", - " 5.247024\n", - " 1.0\n", - " 5.247024\n", - " 27.531262\n", - " 144.457192\n", - " 757.970366\n", - " 3.977089e+03\n", - " 2.086788e+04\n", - " 1.094943e+05\n", - " 5.745191e+05\n", + " 模块\n", + " 2018-01-25 13:00:00\n", + " 2\n", + " 0.0\n", " \n", " \n", - " 1004\n", + " 119261\n", " #zh.wikipedia\n", - " Japan\n", - " Main\n", - " 0.000000\n", - " 8.805225\n", - " 12.0\n", - " 51.071233\n", - " 279.236525\n", - " 1743.367154\n", - " 12017.873604\n", - " 8.883793e+04\n", - " 6.890841e+05\n", - " 5.526233e+06\n", - " 4.537615e+07\n", + " 模块\n", + " 2018-01-25 14:00:00\n", + " 3\n", + " 0.0\n", " \n", " \n", - " 1005\n", + " 119262\n", " #zh.wikipedia\n", - " Japan\n", - " Talk\n", - " 3.806662\n", - " 3.806662\n", - " 1.0\n", - " 3.806662\n", - " 14.490679\n", - " 55.161125\n", - " 209.979787\n", - " 7.993222e+02\n", - " 3.042750e+03\n", - " 1.158272e+04\n", - " 4.409151e+04\n", + " 模块\n", + " 2018-01-25 15:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1006\n", + " 119263\n", " #zh.wikipedia\n", - " Macao\n", - " Main\n", - " 1.386294\n", - " 6.295266\n", - " 12.0\n", - " 48.543764\n", - " 228.344849\n", - " 1169.963335\n", - " 6287.631418\n", - " 3.476625e+04\n", - " 1.958443e+05\n", - " 1.118023e+06\n", - " 6.448414e+06\n", + " 模块\n", + " 2018-01-25 18:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1007\n", + " 119264\n", " #zh.wikipedia\n", - " Macao\n", - " Wikipedia\n", - " 5.370638\n", - " 5.370638\n", - " 1.0\n", - " 5.370638\n", - " 28.843753\n", - " 154.909356\n", - " 831.962077\n", - " 4.468167e+03\n", - " 2.399691e+04\n", - " 1.288787e+05\n", - " 6.921609e+05\n", + " 模块\n", + " 2018-01-25 19:00:00\n", + " 2\n", + " 0.0\n", " \n", " \n", - " 1008\n", + " 119265\n", " #zh.wikipedia\n", - " Malaysia\n", - " Main\n", - " 0.000000\n", - " 6.109248\n", - " 45.0\n", - " 127.018209\n", - " 459.199656\n", - " 1821.140937\n", - " 7705.150344\n", - " 3.435583e+04\n", - " 1.604540e+05\n", - " 7.817490e+05\n", - " 3.958450e+06\n", + " 模块\n", + " 2018-01-25 20:00:00\n", + " 2\n", + " 0.0\n", " \n", " \n", - " 1009\n", + " 119266\n", " #zh.wikipedia\n", - " New Zealand\n", - " Main\n", - " 2.484907\n", - " 3.951244\n", - " 2.0\n", - " 6.436150\n", - " 21.787088\n", - " 77.031814\n", - " 281.872426\n", - " 1.057839e+03\n", - " 4.040852e+03\n", - " 1.562117e+04\n", - " 6.086522e+04\n", + " 模块\n", + " 2018-01-26 08:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1010\n", + " 119267\n", " #zh.wikipedia\n", - " Portugal\n", - " Main\n", - " 2.484907\n", - " 5.187386\n", - " 2.0\n", - " 7.672292\n", - " 33.083733\n", - " 154.930922\n", - " 762.220421\n", - " 3.850892e+03\n", - " 1.972002e+04\n", - " 1.016591e+05\n", - " 5.257640e+05\n", + " 模块\n", + " 2018-01-26 09:00:00\n", + " 2\n", + " 0.0\n", " \n", " \n", - " 1011\n", + " 119268\n", " #zh.wikipedia\n", - " Republic of Korea\n", - " Main\n", - " 3.091042\n", - " 3.091042\n", - " 1.0\n", - " 3.091042\n", - " 9.554543\n", - " 29.533499\n", - " 91.289301\n", - " 2.821791e+02\n", - " 8.722276e+02\n", - " 2.696093e+03\n", - " 8.333736e+03\n", + " 模块\n", + " 2018-01-26 16:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1012\n", + " 119269\n", " #zh.wikipedia\n", - " Singapore\n", - " Main\n", - " 1.386294\n", - " 4.983607\n", - " 12.0\n", - " 32.615067\n", - " 106.962952\n", - " 399.085420\n", - " 1608.995005\n", - " 6.796709e+03\n", - " 2.960535e+04\n", - " 1.318929e+05\n", - " 5.982321e+05\n", + " 模块\n", + " 2018-01-26 17:00:00\n", + " 3\n", + " 0.0\n", " \n", " \n", - " 1013\n", + " 119270\n", " #zh.wikipedia\n", - " Taiwan\n", - " File\n", - " 0.693147\n", - " 5.398163\n", - " 3.0\n", - " 7.189922\n", - " 30.827563\n", - " 158.962321\n", - " 850.836518\n", - " 4.585605e+03\n", - " 2.474621e+04\n", - " 1.335760e+05\n", - " 7.210561e+05\n", + " 模块\n", + " 2018-01-27 09:00:00\n", + " 2\n", + " 0.0\n", " \n", " \n", - " 1014\n", + " 119271\n", " #zh.wikipedia\n", - " Taiwan\n", - " Main\n", - " 0.000000\n", - " 8.348775\n", - " 659.0\n", - " 2209.913454\n", - " 9248.880591\n", - " 43555.882096\n", - " 222371.087542\n", - " 1.205020e+06\n", - " 6.842703e+06\n", - " 4.039094e+07\n", - " 2.465605e+08\n", + " 模块\n", + " 2018-01-27 11:00:00\n", + " 2\n", + " 0.0\n", " \n", " \n", - " 1015\n", + " 119272\n", " #zh.wikipedia\n", - " Taiwan\n", - " Talk\n", - " 2.564949\n", - " 6.984716\n", - " 8.0\n", - " 33.299343\n", - " 158.796546\n", - " 848.436567\n", - " 4912.187478\n", - " 2.994611e+04\n", - " 1.885966e+05\n", - " 1.213369e+06\n", - " 7.924473e+06\n", + " 模块\n", + " 2018-01-27 12:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1016\n", + " 119273\n", " #zh.wikipedia\n", - " Taiwan\n", - " Template\n", - " 2.302585\n", - " 4.127134\n", - " 2.0\n", - " 6.429719\n", - " 22.335136\n", - " 82.506535\n", - " 318.241328\n", - " 1.262136e+03\n", - " 5.090911e+03\n", - " 2.073895e+04\n", - " 8.496629e+04\n", + " 模块\n", + " 2018-01-27 13:00:00\n", + " 4\n", + " 0.0\n", " \n", " \n", - " 1017\n", + " 119274\n", " #zh.wikipedia\n", - " Taiwan\n", - " User\n", - " 3.555348\n", - " 3.555348\n", - " 1.0\n", - " 3.555348\n", - " 12.640500\n", - " 44.941377\n", - " 159.782236\n", - " 5.680815e+02\n", - " 2.019727e+03\n", - " 7.180834e+03\n", - " 2.553036e+04\n", + " 模块\n", + " 2018-01-27 15:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1018\n", + " 119275\n", " #zh.wikipedia\n", - " Taiwan\n", - " User talk\n", - " 6.028279\n", - " 6.028279\n", - " 1.0\n", - " 6.028279\n", - " 36.340142\n", - " 219.068497\n", - " 1320.605915\n", - " 7.960980e+03\n", - " 4.799101e+04\n", - " 2.893032e+05\n", - " 1.744000e+06\n", + " 模块讨论\n", + " 2018-01-12 07:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1019\n", + " 119276\n", " #zh.wikipedia\n", - " Taiwan\n", - " Wikipedia\n", - " 5.843544\n", - " 7.059618\n", - " 7.0\n", - " 45.332490\n", - " 294.732410\n", - " 1923.808607\n", - " 12607.044189\n", - " 8.294237e+04\n", - " 5.478200e+05\n", - " 3.632255e+06\n", - " 2.417480e+07\n", + " 模块讨论\n", + " 2018-01-18 09:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1020\n", - " #zh.wikipedia\n", - " United Kingdom\n", - " Main\n", - " 2.564949\n", - " 2.772589\n", - " 2.0\n", - " 5.337538\n", - " 14.266213\n", - " 38.188290\n", - " 102.376568\n", - " 2.748609e+02\n", - " 7.390245e+02\n", - " 1.989885e+03\n", - " 5.365475e+03\n", + " 119277\n", + " #zh.wikipedia\n", + " 模块讨论\n", + " 2018-01-22 05:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1021\n", + " 119278\n", " #zh.wikipedia\n", - " United States\n", - " Main\n", - " 0.000000\n", - " 7.198931\n", - " 39.0\n", - " 129.906500\n", - " 595.110290\n", - " 3138.035261\n", - " 17972.964491\n", - " 1.084415e+05\n", - " 6.770740e+05\n", - " 4.328078e+06\n", - " 2.814110e+07\n", + " 模块讨论\n", + " 2018-01-23 10:00:00\n", + " 3\n", + " 0.0\n", " \n", " \n", - " 1022\n", + " 119279\n", " #zh.wikipedia\n", - " United States\n", - " Template\n", - " 2.995732\n", - " 2.995732\n", - " 1.0\n", - " 2.995732\n", - " 8.974412\n", - " 26.884935\n", - " 80.540068\n", - " 2.412765e+02\n", - " 7.227997e+02\n", - " 2.165315e+03\n", - " 6.486703e+03\n", + " 模块讨论\n", + " 2018-01-23 11:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", - " 1023\n", + " 119280\n", " #zh.wikipedia\n", - " Vietnam\n", - " Main\n", - " 2.079442\n", - " 5.283204\n", - " 5.0\n", - " 18.003488\n", - " 73.970915\n", - " 332.114026\n", - " 1572.094310\n", - " 7.672361e+03\n", - " 3.813046e+04\n", - " 1.916971e+05\n", - " 9.712955e+05\n", + " 模块讨论\n", + " 2018-01-23 21:00:00\n", + " 1\n", + " 0.0\n", + " \n", + " \n", + " 119281\n", + " #zh.wikipedia\n", + " 爆笑寵妃\n", + " 2018-01-23 09:00:00\n", + " 1\n", + " 0.0\n", + " \n", + " \n", + " 119282\n", + " #zh.wikipedia\n", + " 阿富汗\n", + " 2018-01-09 23:00:00\n", + " 1\n", + " 0.0\n", " \n", " \n", "\n", - "

1024 rows × 14 columns

\n", + "

119283 rows × 5 columns

\n", "" ], "text/plain": [ - " channel countryName namespace min \\\n", - "0 #ar.wikipedia Algeria Main 0.000000 \n", - "1 #ar.wikipedia Algeria نقاش 5.267858 \n", - "2 #ar.wikipedia Australia Main 5.564520 \n", - "3 #ar.wikipedia Bahrain Main 3.784190 \n", - "4 #ar.wikipedia Belgium Main 5.459586 \n", - "5 #ar.wikipedia Denmark ويكيبيديا 6.030685 \n", - "6 #ar.wikipedia Egypt Main 0.693147 \n", - "7 #ar.wikipedia Hashemite Kingdom of Jordan Main 0.693147 \n", - "8 #ar.wikipedia Hashemite Kingdom of Jordan نقاش المستخدم 4.127134 \n", - "9 #ar.wikipedia Iran Main 0.693147 \n", - "10 #ar.wikipedia Iraq Main 0.000000 \n", - "11 #ar.wikipedia Iraq نقاش 5.926926 \n", - "12 #ar.wikipedia Israel Main 0.000000 \n", - "13 #ar.wikipedia Kuwait Main 0.693147 \n", - "14 #ar.wikipedia Kuwait نقاش المستخدم 6.272877 \n", - "15 #ar.wikipedia Lebanon Main 5.236442 \n", - "16 #ar.wikipedia Libya Main 4.488636 \n", - "17 #ar.wikipedia Morocco Main 0.000000 \n", - "18 #ar.wikipedia Morocco بوابة 5.521461 \n", - "19 #ar.wikipedia Morocco قالب 4.382027 \n", - "20 #ar.wikipedia Morocco نقاش 6.666957 \n", - "21 #ar.wikipedia Morocco نقاش المستخدم 5.645447 \n", - "22 #ar.wikipedia Morocco ويكيبيديا 8.711937 \n", - "23 #ar.wikipedia Oman Main 1.386294 \n", - "24 #ar.wikipedia Palestine Main 0.000000 \n", - "25 #ar.wikipedia Qatar Main 2.197225 \n", - "26 #ar.wikipedia Saudi Arabia Main 0.000000 \n", - "27 #ar.wikipedia Saudi Arabia مستخدم 3.367296 \n", - "28 #ar.wikipedia Saudi Arabia ويكيبيديا 3.610918 \n", - "29 #ar.wikipedia Sudan Main 4.477337 \n", - "... ... ... ... ... \n", - "994 #zh.wikipedia China Main 1.098612 \n", - "995 #zh.wikipedia China Wikipedia 1.609438 \n", - "996 #zh.wikipedia Czech Republic Main 1.098612 \n", - "997 #zh.wikipedia Finland Main 3.332205 \n", - "998 #zh.wikipedia France Main 3.663562 \n", - "999 #zh.wikipedia Germany Main 3.135494 \n", - "1000 #zh.wikipedia Hong Kong Main 0.000000 \n", - "1001 #zh.wikipedia Hong Kong Template 2.079442 \n", - "1002 #zh.wikipedia Israel Main 6.265301 \n", - "1003 #zh.wikipedia Italy Main 5.247024 \n", - "1004 #zh.wikipedia Japan Main 0.000000 \n", - "1005 #zh.wikipedia Japan Talk 3.806662 \n", - "1006 #zh.wikipedia Macao Main 1.386294 \n", - "1007 #zh.wikipedia Macao Wikipedia 5.370638 \n", - "1008 #zh.wikipedia Malaysia Main 0.000000 \n", - "1009 #zh.wikipedia New Zealand Main 2.484907 \n", - "1010 #zh.wikipedia Portugal Main 2.484907 \n", - "1011 #zh.wikipedia Republic of Korea Main 3.091042 \n", - "1012 #zh.wikipedia Singapore Main 1.386294 \n", - "1013 #zh.wikipedia Taiwan File 0.693147 \n", - "1014 #zh.wikipedia Taiwan Main 0.000000 \n", - "1015 #zh.wikipedia Taiwan Talk 2.564949 \n", - "1016 #zh.wikipedia Taiwan Template 2.302585 \n", - "1017 #zh.wikipedia Taiwan User 3.555348 \n", - "1018 #zh.wikipedia Taiwan User talk 6.028279 \n", - "1019 #zh.wikipedia Taiwan Wikipedia 5.843544 \n", - "1020 #zh.wikipedia United Kingdom Main 2.564949 \n", - "1021 #zh.wikipedia United States Main 0.000000 \n", - "1022 #zh.wikipedia United States Template 2.995732 \n", - "1023 #zh.wikipedia Vietnam Main 2.079442 \n", - "\n", - " max m0 m1 m2 m3 m4 \\\n", - "0 6.666957 18.0 57.644747 234.884785 1108.007978 5842.083979 \n", - "1 5.267858 1.0 5.267858 27.750330 146.184800 770.080792 \n", - "2 5.564520 1.0 5.564520 30.963887 172.299183 958.762321 \n", - "3 3.784190 1.0 3.784190 14.320091 54.189941 205.065012 \n", - "4 5.459586 1.0 5.459586 29.807074 162.734269 888.461660 \n", - "5 6.030685 1.0 6.030685 36.369165 219.330986 1322.716142 \n", - "6 8.664060 28.0 124.659483 680.894125 4142.018720 26951.803294 \n", - "7 5.164786 8.0 19.517211 59.503557 216.944287 908.031706 \n", - "8 4.127134 1.0 4.127134 17.033238 70.298463 290.131205 \n", - "9 0.693147 1.0 0.693147 0.480453 0.333025 0.230835 \n", - "10 6.520621 19.0 61.367666 287.348181 1524.935621 8645.085403 \n", - "11 6.278521 2.0 12.205447 74.548283 455.701992 2787.931246 \n", - "12 5.442418 4.0 13.757495 64.504332 308.849582 1507.522267 \n", - "13 5.472271 9.0 27.385355 112.181294 525.630362 2608.214237 \n", - "14 6.272877 1.0 6.272877 39.348986 246.831349 1548.342694 \n", - "15 5.996452 2.0 11.232894 63.377762 359.201990 2044.811514 \n", - "16 7.472501 3.0 19.227964 128.792904 891.425647 6312.404206 \n", - "17 7.754053 13.0 45.491987 239.320305 1502.900431 10204.694809 \n", - "18 5.521461 1.0 5.521461 30.486531 168.330188 929.428552 \n", - "19 4.382027 1.0 4.382027 19.202157 84.144365 368.722850 \n", - "20 6.666957 1.0 6.666957 44.448313 296.334981 1975.652517 \n", - "21 5.645447 1.0 5.645447 31.871071 179.926437 1015.765146 \n", - "22 8.711937 1.0 8.711937 75.897851 661.217316 5760.483781 \n", - "23 3.806662 2.0 5.192957 16.412491 57.825323 213.673148 \n", - "24 3.465736 4.0 6.931472 24.022651 83.256163 288.543873 \n", - "25 4.804021 3.0 9.486152 34.081175 136.821624 594.057905 \n", - "26 8.311398 97.0 308.790738 1375.600910 7038.339584 39448.036322 \n", - "27 3.367296 1.0 3.367296 11.338681 38.180694 128.565692 \n", - "28 3.610918 1.0 3.610918 13.038728 47.081777 170.008432 \n", - "29 4.976734 2.0 9.454071 44.814424 213.018272 1015.311782 \n", - "... ... ... ... ... ... ... \n", - "994 6.689599 22.0 80.418326 342.517685 1597.834083 7929.112435 \n", - "995 6.045005 4.0 19.648868 111.067428 656.498970 3929.637204 \n", - "996 4.859812 3.0 9.569343 37.863454 163.185710 729.264530 \n", - "997 3.332205 1.0 3.332205 11.103587 36.999422 123.289642 \n", - "998 5.123964 2.0 8.787526 39.676691 183.700876 869.466985 \n", - "999 3.135494 1.0 3.135494 9.831324 30.826059 96.654931 \n", - "1000 9.431322 440.0 1541.199742 6864.103775 34431.390925 189571.354539 \n", - "1001 3.583519 2.0 5.662960 17.165685 55.009811 183.604539 \n", - "1002 6.265301 1.0 6.265301 39.253999 245.938129 1540.876460 \n", - "1003 5.247024 1.0 5.247024 27.531262 144.457192 757.970366 \n", - "1004 8.805225 12.0 51.071233 279.236525 1743.367154 12017.873604 \n", - "1005 3.806662 1.0 3.806662 14.490679 55.161125 209.979787 \n", - "1006 6.295266 12.0 48.543764 228.344849 1169.963335 6287.631418 \n", - "1007 5.370638 1.0 5.370638 28.843753 154.909356 831.962077 \n", - "1008 6.109248 45.0 127.018209 459.199656 1821.140937 7705.150344 \n", - "1009 3.951244 2.0 6.436150 21.787088 77.031814 281.872426 \n", - "1010 5.187386 2.0 7.672292 33.083733 154.930922 762.220421 \n", - "1011 3.091042 1.0 3.091042 9.554543 29.533499 91.289301 \n", - "1012 4.983607 12.0 32.615067 106.962952 399.085420 1608.995005 \n", - "1013 5.398163 3.0 7.189922 30.827563 158.962321 850.836518 \n", - "1014 8.348775 659.0 2209.913454 9248.880591 43555.882096 222371.087542 \n", - "1015 6.984716 8.0 33.299343 158.796546 848.436567 4912.187478 \n", - "1016 4.127134 2.0 6.429719 22.335136 82.506535 318.241328 \n", - "1017 3.555348 1.0 3.555348 12.640500 44.941377 159.782236 \n", - "1018 6.028279 1.0 6.028279 36.340142 219.068497 1320.605915 \n", - "1019 7.059618 7.0 45.332490 294.732410 1923.808607 12607.044189 \n", - "1020 2.772589 2.0 5.337538 14.266213 38.188290 102.376568 \n", - "1021 7.198931 39.0 129.906500 595.110290 3138.035261 17972.964491 \n", - "1022 2.995732 1.0 2.995732 8.974412 26.884935 80.540068 \n", - "1023 5.283204 5.0 18.003488 73.970915 332.114026 1572.094310 \n", - "\n", - " m5 m6 m7 m8 \n", - "0 3.325478e+04 1.988641e+05 1.226715e+06 7.717880e+06 \n", - "1 4.056676e+03 2.137000e+04 1.125741e+05 5.930244e+05 \n", - "2 5.335052e+03 2.968701e+04 1.651940e+05 9.192252e+05 \n", - "3 7.760049e+02 2.936550e+03 1.111246e+04 4.205166e+04 \n", - "4 4.850632e+03 2.648244e+04 1.445832e+05 7.893641e+05 \n", - "5 7.976885e+03 4.810608e+04 2.901126e+05 1.749578e+06 \n", - "6 1.839678e+05 1.302402e+06 9.492862e+06 7.087854e+07 \n", - "7 4.171502e+03 2.025181e+04 1.013195e+05 5.148902e+05 \n", - "8 1.197410e+03 4.941874e+03 2.039578e+04 8.417612e+04 \n", - "9 1.600027e-01 1.109054e-01 7.687378e-02 5.328484e-02 \n", - "10 5.101748e+04 3.091354e+05 1.907096e+06 1.191112e+07 \n", - "11 1.707021e+04 1.046042e+05 6.415184e+05 3.937453e+06 \n", - "12 7.485986e+03 3.773624e+04 1.926923e+05 9.947230e+05 \n", - "13 1.329461e+04 6.875724e+04 3.588217e+05 1.884294e+06 \n", - "14 9.712563e+03 6.092571e+04 3.821795e+05 2.397365e+06 \n", - "15 1.169018e+04 6.710735e+04 3.867371e+05 2.237002e+06 \n", - "16 4.538466e+04 3.295322e+05 2.407739e+06 1.766220e+07 \n", - "17 7.174515e+04 5.133805e+05 3.712545e+06 2.704703e+07 \n", - "18 5.131803e+03 2.833505e+04 1.564509e+05 8.638374e+05 \n", - "19 1.615753e+03 7.080274e+03 3.102595e+04 1.359565e+05 \n", - "20 1.317159e+04 8.781442e+04 5.854550e+05 3.903203e+06 \n", - "21 5.734448e+03 3.237352e+04 1.827630e+05 1.031779e+06 \n", - "22 5.018497e+04 4.372083e+05 3.808932e+06 3.318317e+07 \n", - "23 8.044423e+02 3.049848e+03 1.159256e+04 4.410515e+04 \n", - "24 1.000017e+03 3.465794e+03 1.201153e+04 4.162878e+04 \n", - "25 2.704686e+03 1.264015e+04 5.988422e+04 2.856838e+05 \n", - "26 2.366367e+05 1.497962e+06 9.907004e+06 6.793387e+07 \n", - "27 4.329187e+02 1.457765e+03 4.908727e+03 1.652914e+04 \n", - "28 6.138865e+02 2.216694e+03 8.004299e+03 2.890287e+04 \n", - "29 4.852247e+03 2.324979e+04 1.116849e+05 5.378129e+05 \n", - "... ... ... ... ... \n", - "994 4.128033e+04 2.237338e+05 1.255615e+06 7.263910e+06 \n", - "995 2.360291e+04 1.419026e+05 8.533656e+05 5.132418e+06 \n", - "996 3.326287e+03 1.539243e+04 7.202931e+04 3.400451e+05 \n", - "997 4.108263e+02 1.368957e+03 4.561646e+03 1.520034e+04 \n", - "998 4.192038e+03 2.051605e+04 1.015925e+05 5.076205e+05 \n", - "999 3.030610e+02 9.502459e+02 2.979491e+03 9.342176e+03 \n", - "1000 1.133382e+06 7.309490e+06 5.048145e+07 3.699213e+08 \n", - "1001 6.298276e+02 2.198520e+03 7.756832e+03 2.754389e+04 \n", - "1002 9.654055e+03 6.048556e+04 3.789603e+05 2.374300e+06 \n", - "1003 3.977089e+03 2.086788e+04 1.094943e+05 5.745191e+05 \n", - "1004 8.883793e+04 6.890841e+05 5.526233e+06 4.537615e+07 \n", - "1005 7.993222e+02 3.042750e+03 1.158272e+04 4.409151e+04 \n", - "1006 3.476625e+04 1.958443e+05 1.118023e+06 6.448414e+06 \n", - "1007 4.468167e+03 2.399691e+04 1.288787e+05 6.921609e+05 \n", - "1008 3.435583e+04 1.604540e+05 7.817490e+05 3.958450e+06 \n", - "1009 1.057839e+03 4.040852e+03 1.562117e+04 6.086522e+04 \n", - "1010 3.850892e+03 1.972002e+04 1.016591e+05 5.257640e+05 \n", - "1011 2.821791e+02 8.722276e+02 2.696093e+03 8.333736e+03 \n", - "1012 6.796709e+03 2.960535e+04 1.318929e+05 5.982321e+05 \n", - "1013 4.585605e+03 2.474621e+04 1.335760e+05 7.210561e+05 \n", - "1014 1.205020e+06 6.842703e+06 4.039094e+07 2.465605e+08 \n", - "1015 2.994611e+04 1.885966e+05 1.213369e+06 7.924473e+06 \n", - "1016 1.262136e+03 5.090911e+03 2.073895e+04 8.496629e+04 \n", - "1017 5.680815e+02 2.019727e+03 7.180834e+03 2.553036e+04 \n", - "1018 7.960980e+03 4.799101e+04 2.893032e+05 1.744000e+06 \n", - "1019 8.294237e+04 5.478200e+05 3.632255e+06 2.417480e+07 \n", - "1020 2.748609e+02 7.390245e+02 1.989885e+03 5.365475e+03 \n", - "1021 1.084415e+05 6.770740e+05 4.328078e+06 2.814110e+07 \n", - "1022 2.412765e+02 7.227997e+02 2.165315e+03 6.486703e+03 \n", - "1023 7.672361e+03 3.813046e+04 1.916971e+05 9.712955e+05 \n", + " channel namespace __time count outliers1\n", + "0 #ar.wikipedia 16 2018-01-15 05:00:00 1 0.0\n", + "1 #ar.wikipedia Main 2018-01-08 00:00:00 149 0.0\n", + "2 #ar.wikipedia Main 2018-01-08 01:00:00 185 0.0\n", + "3 #ar.wikipedia Main 2018-01-08 02:00:00 105 0.0\n", + "4 #ar.wikipedia Main 2018-01-08 03:00:00 64 0.0\n", + "5 #ar.wikipedia Main 2018-01-08 04:00:00 79 0.0\n", + "6 #ar.wikipedia Main 2018-01-08 05:00:00 63 0.0\n", + "7 #ar.wikipedia Main 2018-01-08 06:00:00 122 0.0\n", + "8 #ar.wikipedia Main 2018-01-08 07:00:00 185 0.0\n", + "9 #ar.wikipedia Main 2018-01-08 08:00:00 166 0.0\n", + "10 #ar.wikipedia Main 2018-01-08 09:00:00 118 0.0\n", + "11 #ar.wikipedia Main 2018-01-08 10:00:00 109 0.0\n", + "12 #ar.wikipedia Main 2018-01-08 11:00:00 148 0.0\n", + "13 #ar.wikipedia Main 2018-01-08 12:00:00 205 0.0\n", + "14 #ar.wikipedia Main 2018-01-08 13:00:00 216 0.0\n", + "15 #ar.wikipedia Main 2018-01-08 14:00:00 207 0.0\n", + "16 #ar.wikipedia Main 2018-01-08 15:00:00 165 0.0\n", + "17 #ar.wikipedia Main 2018-01-08 16:00:00 126 0.0\n", + "18 #ar.wikipedia Main 2018-01-08 17:00:00 187 0.0\n", + "19 #ar.wikipedia Main 2018-01-08 18:00:00 319 0.0\n", + "20 #ar.wikipedia Main 2018-01-08 19:00:00 169 0.0\n", + "21 #ar.wikipedia Main 2018-01-08 20:00:00 166 0.0\n", + "22 #ar.wikipedia Main 2018-01-08 21:00:00 166 0.0\n", + "23 #ar.wikipedia Main 2018-01-08 22:00:00 116 0.0\n", + "24 #ar.wikipedia Main 2018-01-08 23:00:00 133 0.0\n", + "25 #ar.wikipedia Main 2018-01-09 00:00:00 76 0.0\n", + "26 #ar.wikipedia Main 2018-01-09 01:00:00 75 0.0\n", + "27 #ar.wikipedia Main 2018-01-09 02:00:00 42 0.0\n", + "28 #ar.wikipedia Main 2018-01-09 03:00:00 38 0.0\n", + "29 #ar.wikipedia Main 2018-01-09 04:00:00 86 0.0\n", + "... ... ... ... ... ...\n", + "119253 #zh.wikipedia 模块 2018-01-24 04:00:00 1 0.0\n", + "119254 #zh.wikipedia 模块 2018-01-24 08:00:00 1 0.0\n", + "119255 #zh.wikipedia 模块 2018-01-24 13:00:00 2 0.0\n", + "119256 #zh.wikipedia 模块 2018-01-24 15:00:00 1 0.0\n", + "119257 #zh.wikipedia 模块 2018-01-25 00:00:00 1 0.0\n", + "119258 #zh.wikipedia 模块 2018-01-25 11:00:00 3 0.0\n", + "119259 #zh.wikipedia 模块 2018-01-25 12:00:00 1 0.0\n", + "119260 #zh.wikipedia 模块 2018-01-25 13:00:00 2 0.0\n", + "119261 #zh.wikipedia 模块 2018-01-25 14:00:00 3 0.0\n", + "119262 #zh.wikipedia 模块 2018-01-25 15:00:00 1 0.0\n", + "119263 #zh.wikipedia 模块 2018-01-25 18:00:00 1 0.0\n", + "119264 #zh.wikipedia 模块 2018-01-25 19:00:00 2 0.0\n", + "119265 #zh.wikipedia 模块 2018-01-25 20:00:00 2 0.0\n", + "119266 #zh.wikipedia 模块 2018-01-26 08:00:00 1 0.0\n", + "119267 #zh.wikipedia 模块 2018-01-26 09:00:00 2 0.0\n", + "119268 #zh.wikipedia 模块 2018-01-26 16:00:00 1 0.0\n", + "119269 #zh.wikipedia 模块 2018-01-26 17:00:00 3 0.0\n", + "119270 #zh.wikipedia 模块 2018-01-27 09:00:00 2 0.0\n", + "119271 #zh.wikipedia 模块 2018-01-27 11:00:00 2 0.0\n", + "119272 #zh.wikipedia 模块 2018-01-27 12:00:00 1 0.0\n", + "119273 #zh.wikipedia 模块 2018-01-27 13:00:00 4 0.0\n", + "119274 #zh.wikipedia 模块 2018-01-27 15:00:00 1 0.0\n", + "119275 #zh.wikipedia 模块讨论 2018-01-12 07:00:00 1 0.0\n", + "119276 #zh.wikipedia 模块讨论 2018-01-18 09:00:00 1 0.0\n", + "119277 #zh.wikipedia 模块讨论 2018-01-22 05:00:00 1 0.0\n", + "119278 #zh.wikipedia 模块讨论 2018-01-23 10:00:00 3 0.0\n", + "119279 #zh.wikipedia 模块讨论 2018-01-23 11:00:00 1 0.0\n", + "119280 #zh.wikipedia 模块讨论 2018-01-23 21:00:00 1 0.0\n", + "119281 #zh.wikipedia 爆笑寵妃 2018-01-23 09:00:00 1 0.0\n", + "119282 #zh.wikipedia 阿富汗 2018-01-09 23:00:00 1 0.0\n", "\n", - "[1024 rows x 14 columns]" + "[119283 rows x 5 columns]" + ] + }, + "execution_count": 217, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1 = big_wiki_data[metric].quantile(0.99)\n", + "big_wiki_oracle = big_wiki_data.groupby(attributes + [pd.TimeGrouper(freq='H')]).agg({metric: [\n", + " 'count',\n", + " outliers(t1, \"1\"),\n", + "]}).reset_index(col_level=1)\n", + "big_wiki_oracle.columns = big_wiki_oracle.columns.get_level_values(1)\n", + "big_wiki_oracle" + ] + }, + { + "cell_type": "code", + "execution_count": 221, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "74235.0" ] }, - "execution_count": 151, + "execution_count": 221, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "wiki_cube = wiki_data.groupby(attributes).agg({metric: [\n", - " 'min',\n", - " 'max',\n", - " moment(0),\n", - " moment(1),\n", - " moment(2),\n", - " moment(3),\n", - " moment(4),\n", - " moment(5),\n", - " moment(6),\n", - " moment(7),\n", - " moment(8)\n", - "]}).reset_index(col_level=1)\n", - "wiki_cube.columns = wiki_cube.columns.get_level_values(1)\n", - "wiki_cube" + "big_wiki_oracle['outliers1'].sum()" ] }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 218, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "wiki_cube.to_csv('lib/src/test/resources/wiki_moments_cubed.csv')" + "big_wiki_oracle.to_csv('lib/src/test/resources/big_wiki_oracle_cubed.csv')" ] }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 219, "metadata": {}, "outputs": [ { @@ -3416,314 +3727,686 @@ " \n", " \n", " channel\n", - " countryName\n", " namespace\n", - " count\n", - " outliers1\n", - " outliers5\n", - " outliers10\n", + " __time\n", + " min\n", + " max\n", + " lmin\n", + " lmax\n", + " m0\n", + " m1\n", + " m2\n", + " m3\n", + " m4\n", + " m5\n", + " lm0\n", + " lm1\n", + " lm2\n", + " lm3\n", + " lm4\n", + " lm5\n", " \n", " \n", " \n", " \n", " 0\n", " #ar.wikipedia\n", - " Algeria\n", - " Main\n", - " 18\n", - " 0.0\n", - " 0.0\n", - " 2.0\n", + " 16\n", + " 2018-01-15 05:00:00\n", + " 25.0\n", + " 25.0\n", + " 3.218876\n", + " 3.218876\n", + " 1.0\n", + " 25.0\n", + " 625.0\n", + " 1.562500e+04\n", + " 3.906250e+05\n", + " 9.765625e+06\n", + " 1.0\n", + " 3.218876\n", + " 10.361162\n", + " 33.351293\n", + " 107.353669\n", + " 3.455581e+02\n", " \n", " \n", " 1\n", " #ar.wikipedia\n", - " Algeria\n", - " نقاش\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-08 00:00:00\n", + " 1.0\n", + " 9008.0\n", + " 0.000000\n", + " 9.105868\n", + " 149.0\n", + " 21663.0\n", + " 83984409.0\n", + " 7.333297e+11\n", + " 6.587312e+15\n", + " 5.931574e+19\n", + " 149.0\n", + " 623.865921\n", + " 2750.866322\n", + " 12573.995231\n", + " 60228.024755\n", + " 3.084326e+05\n", " \n", " \n", " 2\n", " #ar.wikipedia\n", - " Australia\n", " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 01:00:00\n", + " 2.0\n", + " 2211.0\n", + " 0.693147\n", + " 7.701200\n", + " 185.0\n", + " 22428.0\n", + " 17164924.0\n", + " 2.951661e+10\n", + " 5.669284e+13\n", + " 1.124865e+17\n", + " 185.0\n", + " 778.450524\n", + " 3439.849081\n", + " 15837.660630\n", + " 76250.085528\n", + " 3.871860e+05\n", " \n", " \n", " 3\n", " #ar.wikipedia\n", - " Bahrain\n", " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 02:00:00\n", + " 1.0\n", + " 2260.0\n", + " 0.000000\n", + " 7.723120\n", + " 105.0\n", + " 11471.0\n", + " 10752073.0\n", + " 2.150527e+10\n", + " 4.631962e+13\n", + " 1.011019e+17\n", + " 105.0\n", + " 409.865354\n", + " 1737.233189\n", + " 7792.116092\n", + " 37124.511576\n", + " 1.893863e+05\n", " \n", " \n", " 4\n", " #ar.wikipedia\n", - " Belgium\n", " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 03:00:00\n", + " 1.0\n", + " 5184.0\n", + " 0.000000\n", + " 8.553332\n", + " 64.0\n", + " 7686.0\n", + " 27016542.0\n", + " 1.393251e+11\n", + " 7.222052e+14\n", + " 3.743906e+18\n", + " 64.0\n", + " 222.911934\n", + " 854.601760\n", + " 3555.379528\n", + " 16586.848939\n", + " 8.970690e+04\n", " \n", " \n", " 5\n", " #ar.wikipedia\n", - " Denmark\n", - " ويكيبيديا\n", - " 1\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-08 04:00:00\n", " 1.0\n", + " 1904.0\n", + " 0.000000\n", + " 7.551712\n", + " 79.0\n", + " 13132.0\n", + " 10292000.0\n", + " 1.520400e+10\n", + " 2.606315e+13\n", + " 4.699229e+16\n", + " 79.0\n", + " 339.670259\n", + " 1597.102698\n", + " 7916.110671\n", + " 41284.402101\n", + " 2.266564e+05\n", " \n", " \n", " 6\n", " #ar.wikipedia\n", - " Egypt\n", " Main\n", - " 28\n", - " 1.0\n", - " 4.0\n", - " 8.0\n", + " 2018-01-08 05:00:00\n", + " 3.0\n", + " 3720.0\n", + " 1.098612\n", + " 8.221479\n", + " 63.0\n", + " 22247.0\n", + " 38118603.0\n", + " 1.136075e+11\n", + " 3.783324e+14\n", + " 1.295618e+18\n", + " 63.0\n", + " 306.030236\n", + " 1616.485245\n", + " 9106.009984\n", + " 54137.728987\n", + " 3.373556e+05\n", " \n", " \n", " 7\n", " #ar.wikipedia\n", - " Hashemite Kingdom of Jordan\n", " Main\n", - " 8\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 06:00:00\n", + " 1.0\n", + " 2296.0\n", + " 0.000000\n", + " 7.738924\n", + " 122.0\n", + " 25771.0\n", + " 19301921.0\n", + " 2.632441e+10\n", + " 4.700515e+13\n", + " 9.415283e+16\n", + " 122.0\n", + " 533.746768\n", + " 2632.325235\n", + " 13890.844091\n", + " 77252.101951\n", + " 4.486258e+05\n", " \n", " \n", " 8\n", " #ar.wikipedia\n", - " Hashemite Kingdom of Jordan\n", - " نقاش المستخدم\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-08 07:00:00\n", + " 6.0\n", + " 2110.0\n", + " 1.791759\n", + " 7.654443\n", + " 185.0\n", + " 35555.0\n", + " 21997115.0\n", + " 2.728389e+10\n", + " 4.519400e+13\n", + " 8.302905e+16\n", + " 185.0\n", + " 847.318981\n", + " 4131.665057\n", + " 21252.017440\n", + " 114463.683242\n", + " 6.416142e+05\n", " \n", " \n", " 9\n", " #ar.wikipedia\n", - " Iran\n", " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 08:00:00\n", + " 4.0\n", + " 2244.0\n", + " 1.386294\n", + " 7.716015\n", + " 166.0\n", + " 32412.0\n", + " 19844876.0\n", + " 2.461746e+10\n", + " 4.288884e+13\n", + " 8.480738e+16\n", + " 166.0\n", + " 755.340678\n", + " 3693.919162\n", + " 19119.675276\n", + " 103717.481404\n", + " 5.852788e+05\n", " \n", " \n", " 10\n", " #ar.wikipedia\n", - " Iraq\n", " Main\n", - " 19\n", - " 0.0\n", - " 0.0\n", - " 4.0\n", + " 2018-01-08 09:00:00\n", + " 1.0\n", + " 9370.0\n", + " 0.000000\n", + " 9.145268\n", + " 118.0\n", + " 48319.0\n", + " 213810381.0\n", + " 1.489308e+12\n", + " 1.171073e+16\n", + " 9.747945e+19\n", + " 118.0\n", + " 505.935799\n", + " 2471.909237\n", + " 13561.437222\n", + " 82846.690914\n", + " 5.542719e+05\n", " \n", " \n", " 11\n", " #ar.wikipedia\n", - " Iraq\n", - " نقاش\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", + " Main\n", + " 2018-01-08 10:00:00\n", + " 2.0\n", + " 14400.0\n", + " 0.693147\n", + " 9.574983\n", + " 109.0\n", + " 38819.0\n", + " 234665775.0\n", + " 3.054160e+12\n", + " 4.322428e+16\n", + " 6.199923e+20\n", + " 109.0\n", + " 485.395483\n", + " 2407.659640\n", + " 13061.331524\n", + " 76704.563149\n", + " 4.838209e+05\n", " \n", " \n", " 12\n", " #ar.wikipedia\n", - " Israel\n", " Main\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 11:00:00\n", + " 1.0\n", + " 20861.0\n", + " 0.000000\n", + " 9.945637\n", + " 148.0\n", + " 59901.0\n", + " 545245471.0\n", + " 9.823238e+12\n", + " 1.954983e+17\n", + " 4.003327e+21\n", + " 148.0\n", + " 633.654016\n", + " 3107.065691\n", + " 16850.137822\n", + " 100333.756458\n", + " 6.513246e+05\n", " \n", " \n", " 13\n", " #ar.wikipedia\n", - " Kuwait\n", " Main\n", - " 9\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 12:00:00\n", + " 1.0\n", + " 19615.0\n", + " 0.000000\n", + " 9.884050\n", + " 205.0\n", + " 69733.0\n", + " 453862027.0\n", + " 7.791436e+12\n", + " 1.492376e+17\n", + " 2.910235e+21\n", + " 205.0\n", + " 917.447350\n", + " 4610.700216\n", + " 25085.612638\n", + " 146173.142790\n", + " 9.073211e+05\n", " \n", " \n", " 14\n", " #ar.wikipedia\n", - " Kuwait\n", - " نقاش المستخدم\n", - " 1\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-08 13:00:00\n", " 1.0\n", + " 20813.0\n", + " 0.000000\n", + " 9.943333\n", + " 216.0\n", + " 91332.0\n", + " 764243202.0\n", + " 1.165487e+13\n", + " 2.099017e+17\n", + " 4.096753e+21\n", + " 216.0\n", + " 957.165239\n", + " 4692.063959\n", + " 25194.242050\n", + " 148637.342307\n", + " 9.627241e+05\n", " \n", " \n", " 15\n", " #ar.wikipedia\n", - " Lebanon\n", " Main\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 14:00:00\n", + " 1.0\n", + " 24895.0\n", + " 0.000000\n", + " 10.122422\n", + " 207.0\n", + " 98639.0\n", + " 828905321.0\n", + " 1.662305e+13\n", + " 3.926093e+17\n", + " 9.629692e+21\n", + " 207.0\n", + " 967.286478\n", + " 4983.871534\n", + " 27972.605325\n", + " 170348.071088\n", + " 1.118231e+06\n", " \n", " \n", " 16\n", " #ar.wikipedia\n", - " Libya\n", " Main\n", - " 3\n", - " 0.0\n", - " 2.0\n", - " 2.0\n", + " 2018-01-08 15:00:00\n", + " 1.0\n", + " 2758.0\n", + " 0.000000\n", + " 7.922261\n", + " 165.0\n", + " 11762.0\n", + " 11136476.0\n", + " 2.558908e+10\n", + " 6.514402e+13\n", + " 1.714289e+17\n", + " 165.0\n", + " 561.789354\n", + " 2125.120317\n", + " 8656.205080\n", + " 38185.491863\n", + " 1.846790e+05\n", " \n", " \n", " 17\n", " #ar.wikipedia\n", - " Morocco\n", " Main\n", - " 13\n", - " 0.0\n", - " 2.0\n", - " 4.0\n", + " 2018-01-08 16:00:00\n", + " 5.0\n", + " 5824.0\n", + " 1.609438\n", + " 8.669743\n", + " 126.0\n", + " 29981.0\n", + " 88989143.0\n", + " 4.395026e+11\n", + " 2.415290e+15\n", + " 1.372286e+19\n", + " 126.0\n", + " 507.100169\n", + " 2243.439188\n", + " 11084.672780\n", + " 61604.603177\n", + " 3.816690e+05\n", " \n", " \n", " 18\n", " #ar.wikipedia\n", - " Morocco\n", - " بوابة\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-08 17:00:00\n", + " 1.0\n", + " 9981.0\n", + " 0.000000\n", + " 9.208439\n", + " 187.0\n", + " 36781.0\n", + " 134903627.0\n", + " 1.066277e+12\n", + " 1.007978e+16\n", + " 9.939758e+19\n", + " 187.0\n", + " 710.183006\n", + " 3039.636619\n", + " 14510.236033\n", + " 77742.351888\n", + " 4.661678e+05\n", " \n", " \n", " 19\n", " #ar.wikipedia\n", - " Morocco\n", - " قالب\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-08 18:00:00\n", + " 1.0\n", + " 9299.0\n", + " 0.000000\n", + " 9.137662\n", + " 319.0\n", + " 56324.0\n", + " 180817952.0\n", + " 1.160272e+12\n", + " 9.029371e+15\n", + " 7.672157e+19\n", + " 319.0\n", + " 1189.529105\n", + " 5003.134721\n", + " 23442.962217\n", + " 123315.694340\n", + " 7.269031e+05\n", " \n", " \n", " 20\n", " #ar.wikipedia\n", - " Morocco\n", - " نقاش\n", - " 1\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-08 19:00:00\n", " 1.0\n", + " 12493.0\n", + " 0.000000\n", + " 9.432924\n", + " 169.0\n", + " 36030.0\n", + " 189039444.0\n", + " 2.029805e+12\n", + " 2.457818e+16\n", + " 3.049638e+20\n", + " 169.0\n", + " 641.152635\n", + " 2752.672525\n", + " 13162.823765\n", + " 70715.579455\n", + " 4.268993e+05\n", " \n", " \n", " 21\n", " #ar.wikipedia\n", - " Morocco\n", - " نقاش المستخدم\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-08 20:00:00\n", + " 1.0\n", + " 2868.0\n", + " 0.000000\n", + " 7.961370\n", + " 166.0\n", + " 27412.0\n", + " 30194820.0\n", + " 5.825853e+10\n", + " 1.326486e+14\n", + " 3.273461e+17\n", + " 166.0\n", + " 669.355818\n", + " 2995.182800\n", + " 14509.811177\n", + " 76283.256699\n", + " 4.338066e+05\n", " \n", " \n", " 22\n", " #ar.wikipedia\n", - " Morocco\n", - " ويكيبيديا\n", - " 1\n", - " 1.0\n", - " 1.0\n", + " Main\n", + " 2018-01-08 21:00:00\n", " 1.0\n", + " 9831.0\n", + " 0.000000\n", + " 9.193296\n", + " 166.0\n", + " 81943.0\n", + " 351898425.0\n", + " 2.292421e+12\n", + " 1.814749e+16\n", + " 1.579637e+20\n", + " 166.0\n", + " 693.088089\n", + " 3395.679190\n", + " 19308.399786\n", + " 124886.042468\n", + " 8.875766e+05\n", " \n", " \n", " 23\n", " #ar.wikipedia\n", - " Oman\n", " Main\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 22:00:00\n", + " 1.0\n", + " 4344.0\n", + " 0.000000\n", + " 8.376551\n", + " 116.0\n", + " 41030.0\n", + " 94661544.0\n", + " 3.027321e+11\n", + " 1.094226e+15\n", + " 4.217602e+18\n", + " 116.0\n", + " 471.483026\n", + " 2320.786841\n", + " 12923.651010\n", + " 79754.002744\n", + " 5.325321e+05\n", " \n", " \n", " 24\n", " #ar.wikipedia\n", - " Palestine\n", " Main\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-08 23:00:00\n", + " 1.0\n", + " 4500.0\n", + " 0.000000\n", + " 8.411833\n", + " 133.0\n", + " 34747.0\n", + " 66006241.0\n", + " 2.072867e+11\n", + " 7.757698e+14\n", + " 3.141152e+18\n", + " 133.0\n", + " 551.993346\n", + " 2618.555596\n", + " 13703.861938\n", + " 78719.835498\n", + " 4.903965e+05\n", " \n", " \n", " 25\n", " #ar.wikipedia\n", - " Qatar\n", " Main\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-09 00:00:00\n", + " 1.0\n", + " 7474.0\n", + " 0.000000\n", + " 8.919186\n", + " 76.0\n", + " 35724.0\n", + " 116030450.0\n", + " 6.091646e+11\n", + " 3.818369e+15\n", + " 2.606292e+19\n", + " 76.0\n", + " 335.035936\n", + " 1753.130422\n", + " 10269.091366\n", + " 65447.315751\n", + " 4.447135e+05\n", " \n", " \n", " 26\n", " #ar.wikipedia\n", - " Saudi Arabia\n", " Main\n", - " 97\n", - " 0.0\n", - " 3.0\n", - " 8.0\n", - " \n", - " \n", - " 27\n", - " #ar.wikipedia\n", - " Saudi Arabia\n", - " مستخدم\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-09 01:00:00\n", + " 1.0\n", + " 2811.0\n", + " 0.000000\n", + " 7.941296\n", + " 75.0\n", + " 11939.0\n", + " 18132095.0\n", + " 4.276567e+10\n", + " 1.072078e+14\n", + " 2.762855e+17\n", + " 75.0\n", + " 289.623485\n", + " 1250.400635\n", + " 5902.212860\n", + " 30618.199278\n", + " 1.747318e+05\n", + " \n", + " \n", + " 27\n", + " #ar.wikipedia\n", + " Main\n", + " 2018-01-09 02:00:00\n", + " 2.0\n", + " 702.0\n", + " 0.693147\n", + " 6.553933\n", + " 42.0\n", + " 3056.0\n", + " 964256.0\n", + " 5.605717e+08\n", + " 3.626704e+11\n", + " 2.399057e+14\n", + " 42.0\n", + " 146.596532\n", + " 582.824468\n", + " 2498.180334\n", + " 11387.289389\n", + " 5.507752e+04\n", " \n", " \n", " 28\n", " #ar.wikipedia\n", - " Saudi Arabia\n", - " ويكيبيديا\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " Main\n", + " 2018-01-09 03:00:00\n", + " 1.0\n", + " 1299.0\n", + " 0.000000\n", + " 7.169350\n", + " 38.0\n", + " 3930.0\n", + " 1942072.0\n", + " 2.222922e+09\n", + " 2.851957e+12\n", + " 3.699472e+15\n", + " 38.0\n", + " 151.273501\n", + " 665.242663\n", + " 3040.736624\n", + " 14415.383172\n", + " 7.132009e+04\n", " \n", " \n", " 29\n", " #ar.wikipedia\n", - " Sudan\n", " Main\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 2018-01-09 04:00:00\n", + " 2.0\n", + " 447.0\n", + " 0.693147\n", + " 6.102559\n", + " 86.0\n", + " 5300.0\n", + " 532602.0\n", + " 1.162811e+08\n", + " 4.249718e+10\n", + " 1.812852e+13\n", + " 86.0\n", + " 337.751266\n", + " 1368.923662\n", + " 5647.867090\n", + " 23665.550578\n", + " 1.008164e+05\n", " \n", " \n", " ...\n", @@ -3734,480 +4417,978 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 994\n", + " 119253\n", " #zh.wikipedia\n", - " China\n", - " Main\n", - " 22\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-24 04:00:00\n", + " 1.0\n", + " 1.0\n", + " 0.000000\n", + " 0.000000\n", + " 1.0\n", + " 1.0\n", " 1.0\n", + " 1.000000e+00\n", + " 1.000000e+00\n", + " 1.000000e+00\n", + " 1.0\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000\n", + " 0.000000e+00\n", " \n", " \n", - " 995\n", + " 119254\n", " #zh.wikipedia\n", - " China\n", - " Wikipedia\n", - " 4\n", - " 0.0\n", - " 0.0\n", - " 2.0\n", + " 模块\n", + " 2018-01-24 08:00:00\n", + " 6.0\n", + " 6.0\n", + " 1.791759\n", + " 1.791759\n", + " 1.0\n", + " 6.0\n", + " 36.0\n", + " 2.160000e+02\n", + " 1.296000e+03\n", + " 7.776000e+03\n", + " 1.0\n", + " 1.791759\n", + " 3.210402\n", + " 5.752268\n", + " 10.306681\n", + " 1.846709e+01\n", " \n", " \n", - " 996\n", + " 119255\n", " #zh.wikipedia\n", - " Czech Republic\n", - " Main\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-24 13:00:00\n", + " 9.0\n", + " 6132.0\n", + " 2.197225\n", + " 8.721276\n", + " 2.0\n", + " 6141.0\n", + " 37601505.0\n", + " 2.305719e+11\n", + " 1.413867e+15\n", + " 8.669833e+18\n", + " 2.0\n", + " 10.918501\n", + " 80.888455\n", + " 673.953772\n", + " 5808.531499\n", + " 5.050575e+04\n", " \n", " \n", - " 997\n", + " 119256\n", " #zh.wikipedia\n", - " Finland\n", - " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-24 15:00:00\n", + " 28.0\n", + " 28.0\n", + " 3.332205\n", + " 3.332205\n", + " 1.0\n", + " 28.0\n", + " 784.0\n", + " 2.195200e+04\n", + " 6.146560e+05\n", + " 1.721037e+07\n", + " 1.0\n", + " 3.332205\n", + " 11.103587\n", + " 36.999422\n", + " 123.289642\n", + " 4.108263e+02\n", " \n", " \n", - " 998\n", + " 119257\n", " #zh.wikipedia\n", - " France\n", - " Main\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-25 00:00:00\n", + " 15.0\n", + " 15.0\n", + " 2.708050\n", + " 2.708050\n", + " 1.0\n", + " 15.0\n", + " 225.0\n", + " 3.375000e+03\n", + " 5.062500e+04\n", + " 7.593750e+05\n", + " 1.0\n", + " 2.708050\n", + " 7.333536\n", + " 19.859583\n", + " 53.780749\n", + " 1.456410e+02\n", " \n", " \n", - " 999\n", + " 119258\n", " #zh.wikipedia\n", - " Germany\n", - " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-25 11:00:00\n", + " 15.0\n", + " 298.0\n", + " 2.708050\n", + " 5.697093\n", + " 3.0\n", + " 431.0\n", + " 102953.0\n", + " 2.811000e+07\n", + " 8.080079e+09\n", + " 2.372951e+12\n", + " 3.0\n", + " 13.175828\n", + " 62.549842\n", + " 313.347501\n", + " 1625.221166\n", + " 8.618412e+03\n", " \n", " \n", - " 1000\n", + " 119259\n", " #zh.wikipedia\n", - " Hong Kong\n", - " Main\n", - " 440\n", + " 模块\n", + " 2018-01-25 12:00:00\n", " 2.0\n", - " 14.0\n", - " 22.0\n", + " 2.0\n", + " 0.693147\n", + " 0.693147\n", + " 1.0\n", + " 2.0\n", + " 4.0\n", + " 8.000000e+00\n", + " 1.600000e+01\n", + " 3.200000e+01\n", + " 1.0\n", + " 0.693147\n", + " 0.480453\n", + " 0.333025\n", + " 0.230835\n", + " 1.600027e-01\n", " \n", " \n", - " 1001\n", + " 119260\n", " #zh.wikipedia\n", - " Hong Kong\n", - " Template\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-25 13:00:00\n", + " 4.0\n", + " 5020.0\n", + " 1.386294\n", + " 8.521185\n", + " 2.0\n", + " 5024.0\n", + " 25200416.0\n", + " 1.265060e+11\n", + " 6.350602e+14\n", + " 3.188002e+18\n", + " 2.0\n", + " 9.907480\n", + " 74.532409\n", + " 621.392546\n", + " 5275.992221\n", + " 4.493136e+04\n", " \n", " \n", - " 1002\n", + " 119261\n", " #zh.wikipedia\n", - " Israel\n", - " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", + " 模块\n", + " 2018-01-25 14:00:00\n", + " 2.0\n", + " 47.0\n", + " 0.693147\n", + " 3.850148\n", + " 3.0\n", + " 53.0\n", + " 2229.0\n", + " 1.038950e+05\n", + " 4.879953e+06\n", + " 2.293461e+08\n", + " 3.0\n", + " 5.929589\n", + " 17.225902\n", + " 60.070411\n", + " 223.664397\n", + " 8.513123e+02\n", " \n", " \n", - " 1003\n", + " 119262\n", " #zh.wikipedia\n", - " Italy\n", - " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-25 15:00:00\n", + " 49.0\n", + " 49.0\n", + " 3.891820\n", + " 3.891820\n", + " 1.0\n", + " 49.0\n", + " 2401.0\n", + " 1.176490e+05\n", + " 5.764801e+06\n", + " 2.824752e+08\n", + " 1.0\n", + " 3.891820\n", + " 15.146265\n", + " 58.946542\n", + " 229.409351\n", + " 8.928200e+02\n", " \n", " \n", - " 1004\n", + " 119263\n", " #zh.wikipedia\n", - " Japan\n", - " Main\n", - " 12\n", + " 模块\n", + " 2018-01-25 18:00:00\n", + " 49.0\n", + " 49.0\n", + " 3.891820\n", + " 3.891820\n", " 1.0\n", - " 2.0\n", - " 3.0\n", + " 49.0\n", + " 2401.0\n", + " 1.176490e+05\n", + " 5.764801e+06\n", + " 2.824752e+08\n", + " 1.0\n", + " 3.891820\n", + " 15.146265\n", + " 58.946542\n", + " 229.409351\n", + " 8.928200e+02\n", " \n", " \n", - " 1005\n", + " 119264\n", " #zh.wikipedia\n", - " Japan\n", - " Talk\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-25 19:00:00\n", + " 42.0\n", + " 84.0\n", + " 3.737670\n", + " 4.430817\n", + " 2.0\n", + " 126.0\n", + " 8820.0\n", + " 6.667920e+05\n", + " 5.289883e+07\n", + " 4.312811e+09\n", + " 2.0\n", + " 8.168486\n", + " 33.602312\n", + " 139.202300\n", + " 580.586590\n", + " 2.437194e+03\n", " \n", " \n", - " 1006\n", + " 119265\n", " #zh.wikipedia\n", - " Macao\n", - " Main\n", - " 12\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", + " 模块\n", + " 2018-01-25 20:00:00\n", + " 22.0\n", + " 173.0\n", + " 3.091042\n", + " 5.153292\n", + " 2.0\n", + " 195.0\n", + " 30413.0\n", + " 5.188365e+06\n", + " 8.959793e+08\n", + " 1.549690e+11\n", + " 2.0\n", + " 8.244334\n", + " 36.110958\n", + " 166.386446\n", + " 796.532439\n", + " 3.916503e+03\n", " \n", " \n", - " 1007\n", + " 119266\n", " #zh.wikipedia\n", - " Macao\n", - " Wikipedia\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-26 08:00:00\n", + " 70.0\n", + " 70.0\n", + " 4.248495\n", + " 4.248495\n", + " 1.0\n", + " 70.0\n", + " 4900.0\n", + " 3.430000e+05\n", + " 2.401000e+07\n", + " 1.680700e+09\n", + " 1.0\n", + " 4.248495\n", + " 18.049712\n", + " 76.684115\n", + " 325.792097\n", + " 1.384126e+03\n", " \n", " \n", - " 1008\n", + " 119267\n", " #zh.wikipedia\n", - " Malaysia\n", - " Main\n", - " 45\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-26 09:00:00\n", " 1.0\n", + " 18.0\n", + " 0.000000\n", + " 2.890372\n", + " 2.0\n", + " 19.0\n", + " 325.0\n", + " 5.833000e+03\n", + " 1.049770e+05\n", + " 1.889569e+06\n", + " 2.0\n", + " 2.890372\n", + " 8.354249\n", + " 24.146885\n", + " 69.793475\n", + " 2.017291e+02\n", " \n", " \n", - " 1009\n", + " 119268\n", " #zh.wikipedia\n", - " New Zealand\n", - " Main\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-26 16:00:00\n", + " 1799.0\n", + " 1799.0\n", + " 7.494986\n", + " 7.494986\n", + " 1.0\n", + " 1799.0\n", + " 3236401.0\n", + " 5.822285e+09\n", + " 1.047429e+13\n", + " 1.884325e+16\n", + " 1.0\n", + " 7.494986\n", + " 56.174819\n", + " 421.029492\n", + " 3155.610250\n", + " 2.365126e+04\n", " \n", " \n", - " 1010\n", + " 119269\n", " #zh.wikipedia\n", - " Portugal\n", - " Main\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-26 17:00:00\n", + " 28.0\n", + " 3574.0\n", + " 3.332205\n", + " 8.181441\n", + " 3.0\n", + " 7016.0\n", + " 24429656.0\n", + " 8.544395e+10\n", + " 2.990099e+14\n", + " 1.046926e+18\n", + " 3.0\n", + " 19.649285\n", + " 144.228195\n", + " 1123.119020\n", + " 8984.649590\n", + " 7.270887e+04\n", " \n", " \n", - " 1011\n", + " 119270\n", " #zh.wikipedia\n", - " Republic of Korea\n", - " Main\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-27 09:00:00\n", + " 191.0\n", + " 8720.0\n", + " 5.252273\n", + " 9.073375\n", + " 2.0\n", + " 8911.0\n", + " 76074881.0\n", + " 6.630618e+11\n", + " 5.781840e+15\n", + " 5.041763e+19\n", + " 2.0\n", + " 14.325648\n", + " 109.912501\n", + " 891.866956\n", + " 7538.599028\n", + " 6.549264e+04\n", " \n", " \n", - " 1012\n", + " 119271\n", " #zh.wikipedia\n", - " Singapore\n", - " Main\n", - " 12\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块\n", + " 2018-01-27 11:00:00\n", + " 130.0\n", + " 191.0\n", + " 4.867534\n", + " 5.252273\n", + " 2.0\n", + " 321.0\n", + " 53381.0\n", + " 9.164871e+06\n", + " 1.616473e+09\n", + " 2.913242e+11\n", + " 2.0\n", + " 10.119808\n", + " 51.279268\n", + " 260.217157\n", + " 1322.361263\n", + " 6.729429e+03\n", " \n", - " \n", - " 1013\n", - " #zh.wikipedia\n", - " Taiwan\n", - " File\n", - " 3\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " \n", + " 119272\n", + " #zh.wikipedia\n", + " 模块\n", + " 2018-01-27 12:00:00\n", + " 2548.0\n", + " 2548.0\n", + " 7.843064\n", + " 7.843064\n", + " 1.0\n", + " 2548.0\n", + " 6492304.0\n", + " 1.654239e+10\n", + " 4.215001e+13\n", + " 1.073982e+17\n", + " 1.0\n", + " 7.843064\n", + " 61.513653\n", + " 482.455520\n", + " 3783.929526\n", + " 2.967760e+04\n", " \n", " \n", - " 1014\n", + " 119273\n", " #zh.wikipedia\n", - " Taiwan\n", - " Main\n", - " 659\n", - " 0.0\n", - " 6.0\n", - " 39.0\n", + " 模块\n", + " 2018-01-27 13:00:00\n", + " 12.0\n", + " 1146.0\n", + " 2.484907\n", + " 7.044033\n", + " 4.0\n", + " 1244.0\n", + " 1317608.0\n", + " 1.505279e+09\n", + " 1.724811e+12\n", + " 1.976620e+15\n", + " 4.0\n", + " 16.921587\n", + " 83.383945\n", + " 468.802091\n", + " 2895.230560\n", + " 1.895162e+04\n", " \n", " \n", - " 1015\n", + " 119274\n", " #zh.wikipedia\n", - " Taiwan\n", - " Talk\n", - " 8\n", - " 0.0\n", + " 模块\n", + " 2018-01-27 15:00:00\n", + " 88.0\n", + " 88.0\n", + " 4.477337\n", + " 4.477337\n", " 1.0\n", + " 88.0\n", + " 7744.0\n", + " 6.814720e+05\n", + " 5.996954e+07\n", + " 5.277319e+09\n", " 1.0\n", + " 4.477337\n", + " 20.046545\n", + " 89.755134\n", + " 401.863964\n", + " 1.799280e+03\n", " \n", " \n", - " 1016\n", + " 119275\n", " #zh.wikipedia\n", - " Taiwan\n", - " Template\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块讨论\n", + " 2018-01-12 07:00:00\n", + " 438.0\n", + " 438.0\n", + " 6.082219\n", + " 6.082219\n", + " 1.0\n", + " 438.0\n", + " 191844.0\n", + " 8.402767e+07\n", + " 3.680412e+10\n", + " 1.612020e+13\n", + " 1.0\n", + " 6.082219\n", + " 36.993387\n", + " 225.001877\n", + " 1368.510672\n", + " 8.323581e+03\n", " \n", " \n", - " 1017\n", + " 119276\n", " #zh.wikipedia\n", - " Taiwan\n", - " User\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块讨论\n", + " 2018-01-18 09:00:00\n", + " 96.0\n", + " 96.0\n", + " 4.564348\n", + " 4.564348\n", + " 1.0\n", + " 96.0\n", + " 9216.0\n", + " 8.847360e+05\n", + " 8.493466e+07\n", + " 8.153727e+09\n", + " 1.0\n", + " 4.564348\n", + " 20.833274\n", + " 95.090318\n", + " 434.025323\n", + " 1.981043e+03\n", " \n", " \n", - " 1018\n", + " 119277\n", " #zh.wikipedia\n", - " Taiwan\n", - " User talk\n", - " 1\n", - " 0.0\n", - " 0.0\n", + " 模块讨论\n", + " 2018-01-22 05:00:00\n", + " 26497.0\n", + " 26497.0\n", + " 10.184787\n", + " 10.184787\n", + " 1.0\n", + " 26497.0\n", + " 702091009.0\n", + " 1.860331e+13\n", + " 4.929318e+17\n", + " 1.306121e+22\n", " 1.0\n", + " 10.184787\n", + " 103.729882\n", + " 1056.466734\n", + " 10759.888445\n", + " 1.095872e+05\n", " \n", " \n", - " 1019\n", + " 119278\n", " #zh.wikipedia\n", - " Taiwan\n", - " Wikipedia\n", - " 7\n", - " 0.0\n", - " 2.0\n", + " 模块讨论\n", + " 2018-01-23 10:00:00\n", " 6.0\n", + " 427.0\n", + " 1.791759\n", + " 6.056784\n", + " 3.0\n", + " 632.0\n", + " 221966.0\n", + " 8.573530e+07\n", + " 3.481210e+10\n", + " 1.450721e+13\n", + " 3.0\n", + " 13.141848\n", + " 67.914111\n", + " 376.256674\n", + " 2141.137567\n", + " 1.232507e+04\n", " \n", " \n", - " 1020\n", + " 119279\n", " #zh.wikipedia\n", - " United Kingdom\n", - " Main\n", - " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 模块讨论\n", + " 2018-01-23 11:00:00\n", + " 185.0\n", + " 185.0\n", + " 5.220356\n", + " 5.220356\n", + " 1.0\n", + " 185.0\n", + " 34225.0\n", + " 6.331625e+06\n", + " 1.171351e+09\n", + " 2.166999e+11\n", + " 1.0\n", + " 5.220356\n", + " 27.252115\n", + " 142.265737\n", + " 742.677769\n", + " 3.877042e+03\n", " \n", " \n", - " 1021\n", + " 119280\n", " #zh.wikipedia\n", - " United States\n", - " Main\n", - " 39\n", - " 0.0\n", + " 模块讨论\n", + " 2018-01-23 21:00:00\n", + " 137.0\n", + " 137.0\n", + " 4.919981\n", + " 4.919981\n", " 1.0\n", - " 5.0\n", + " 137.0\n", + " 18769.0\n", + " 2.571353e+06\n", + " 3.522754e+08\n", + " 4.826172e+10\n", + " 1.0\n", + " 4.919981\n", + " 24.206212\n", + " 119.094103\n", + " 585.940714\n", + " 2.882817e+03\n", " \n", " \n", - " 1022\n", + " 119281\n", " #zh.wikipedia\n", - " United States\n", - " Template\n", - " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 爆笑寵妃\n", + " 2018-01-23 09:00:00\n", + " 59.0\n", + " 59.0\n", + " 4.077537\n", + " 4.077537\n", + " 1.0\n", + " 59.0\n", + " 3481.0\n", + " 2.053790e+05\n", + " 1.211736e+07\n", + " 7.149243e+08\n", + " 1.0\n", + " 4.077537\n", + " 16.626312\n", + " 67.794408\n", + " 276.434238\n", + " 1.127171e+03\n", " \n", " \n", - " 1023\n", + " 119282\n", " #zh.wikipedia\n", - " Vietnam\n", - " Main\n", - " 5\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 阿富汗\n", + " 2018-01-09 23:00:00\n", + " 28.0\n", + " 28.0\n", + " 3.332205\n", + " 3.332205\n", + " 1.0\n", + " 28.0\n", + " 784.0\n", + " 2.195200e+04\n", + " 6.146560e+05\n", + " 1.721037e+07\n", + " 1.0\n", + " 3.332205\n", + " 11.103587\n", + " 36.999422\n", + " 123.289642\n", + " 4.108263e+02\n", " \n", " \n", "\n", - "

1024 rows × 7 columns

\n", + "

119283 rows × 19 columns

\n", "" ], "text/plain": [ - " channel countryName namespace count \\\n", - "0 #ar.wikipedia Algeria Main 18 \n", - "1 #ar.wikipedia Algeria نقاش 1 \n", - "2 #ar.wikipedia Australia Main 1 \n", - "3 #ar.wikipedia Bahrain Main 1 \n", - "4 #ar.wikipedia Belgium Main 1 \n", - "5 #ar.wikipedia Denmark ويكيبيديا 1 \n", - "6 #ar.wikipedia Egypt Main 28 \n", - "7 #ar.wikipedia Hashemite Kingdom of Jordan Main 8 \n", - "8 #ar.wikipedia Hashemite Kingdom of Jordan نقاش المستخدم 1 \n", - "9 #ar.wikipedia Iran Main 1 \n", - "10 #ar.wikipedia Iraq Main 19 \n", - "11 #ar.wikipedia Iraq نقاش 2 \n", - "12 #ar.wikipedia Israel Main 4 \n", - "13 #ar.wikipedia Kuwait Main 9 \n", - "14 #ar.wikipedia Kuwait نقاش المستخدم 1 \n", - "15 #ar.wikipedia Lebanon Main 2 \n", - "16 #ar.wikipedia Libya Main 3 \n", - "17 #ar.wikipedia Morocco Main 13 \n", - "18 #ar.wikipedia Morocco بوابة 1 \n", - "19 #ar.wikipedia Morocco قالب 1 \n", - "20 #ar.wikipedia Morocco نقاش 1 \n", - "21 #ar.wikipedia Morocco نقاش المستخدم 1 \n", - "22 #ar.wikipedia Morocco ويكيبيديا 1 \n", - "23 #ar.wikipedia Oman Main 2 \n", - "24 #ar.wikipedia Palestine Main 4 \n", - "25 #ar.wikipedia Qatar Main 3 \n", - "26 #ar.wikipedia Saudi Arabia Main 97 \n", - "27 #ar.wikipedia Saudi Arabia مستخدم 1 \n", - "28 #ar.wikipedia Saudi Arabia ويكيبيديا 1 \n", - "29 #ar.wikipedia Sudan Main 2 \n", - "... ... ... ... ... \n", - "994 #zh.wikipedia China Main 22 \n", - "995 #zh.wikipedia China Wikipedia 4 \n", - "996 #zh.wikipedia Czech Republic Main 3 \n", - "997 #zh.wikipedia Finland Main 1 \n", - "998 #zh.wikipedia France Main 2 \n", - "999 #zh.wikipedia Germany Main 1 \n", - "1000 #zh.wikipedia Hong Kong Main 440 \n", - "1001 #zh.wikipedia Hong Kong Template 2 \n", - "1002 #zh.wikipedia Israel Main 1 \n", - "1003 #zh.wikipedia Italy Main 1 \n", - "1004 #zh.wikipedia Japan Main 12 \n", - "1005 #zh.wikipedia Japan Talk 1 \n", - "1006 #zh.wikipedia Macao Main 12 \n", - "1007 #zh.wikipedia Macao Wikipedia 1 \n", - "1008 #zh.wikipedia Malaysia Main 45 \n", - "1009 #zh.wikipedia New Zealand Main 2 \n", - "1010 #zh.wikipedia Portugal Main 2 \n", - "1011 #zh.wikipedia Republic of Korea Main 1 \n", - "1012 #zh.wikipedia Singapore Main 12 \n", - "1013 #zh.wikipedia Taiwan File 3 \n", - "1014 #zh.wikipedia Taiwan Main 659 \n", - "1015 #zh.wikipedia Taiwan Talk 8 \n", - "1016 #zh.wikipedia Taiwan Template 2 \n", - "1017 #zh.wikipedia Taiwan User 1 \n", - "1018 #zh.wikipedia Taiwan User talk 1 \n", - "1019 #zh.wikipedia Taiwan Wikipedia 7 \n", - "1020 #zh.wikipedia United Kingdom Main 2 \n", - "1021 #zh.wikipedia United States Main 39 \n", - "1022 #zh.wikipedia United States Template 1 \n", - "1023 #zh.wikipedia Vietnam Main 5 \n", + " channel namespace __time min max \\\n", + "0 #ar.wikipedia 16 2018-01-15 05:00:00 25.0 25.0 \n", + "1 #ar.wikipedia Main 2018-01-08 00:00:00 1.0 9008.0 \n", + "2 #ar.wikipedia Main 2018-01-08 01:00:00 2.0 2211.0 \n", + "3 #ar.wikipedia Main 2018-01-08 02:00:00 1.0 2260.0 \n", + "4 #ar.wikipedia Main 2018-01-08 03:00:00 1.0 5184.0 \n", + "5 #ar.wikipedia Main 2018-01-08 04:00:00 1.0 1904.0 \n", + "6 #ar.wikipedia Main 2018-01-08 05:00:00 3.0 3720.0 \n", + "7 #ar.wikipedia Main 2018-01-08 06:00:00 1.0 2296.0 \n", + "8 #ar.wikipedia Main 2018-01-08 07:00:00 6.0 2110.0 \n", + "9 #ar.wikipedia Main 2018-01-08 08:00:00 4.0 2244.0 \n", + "10 #ar.wikipedia Main 2018-01-08 09:00:00 1.0 9370.0 \n", + "11 #ar.wikipedia Main 2018-01-08 10:00:00 2.0 14400.0 \n", + "12 #ar.wikipedia Main 2018-01-08 11:00:00 1.0 20861.0 \n", + "13 #ar.wikipedia Main 2018-01-08 12:00:00 1.0 19615.0 \n", + "14 #ar.wikipedia Main 2018-01-08 13:00:00 1.0 20813.0 \n", + "15 #ar.wikipedia Main 2018-01-08 14:00:00 1.0 24895.0 \n", + "16 #ar.wikipedia Main 2018-01-08 15:00:00 1.0 2758.0 \n", + "17 #ar.wikipedia Main 2018-01-08 16:00:00 5.0 5824.0 \n", + "18 #ar.wikipedia Main 2018-01-08 17:00:00 1.0 9981.0 \n", + "19 #ar.wikipedia Main 2018-01-08 18:00:00 1.0 9299.0 \n", + "20 #ar.wikipedia Main 2018-01-08 19:00:00 1.0 12493.0 \n", + "21 #ar.wikipedia Main 2018-01-08 20:00:00 1.0 2868.0 \n", + "22 #ar.wikipedia Main 2018-01-08 21:00:00 1.0 9831.0 \n", + "23 #ar.wikipedia Main 2018-01-08 22:00:00 1.0 4344.0 \n", + "24 #ar.wikipedia Main 2018-01-08 23:00:00 1.0 4500.0 \n", + "25 #ar.wikipedia Main 2018-01-09 00:00:00 1.0 7474.0 \n", + "26 #ar.wikipedia Main 2018-01-09 01:00:00 1.0 2811.0 \n", + "27 #ar.wikipedia Main 2018-01-09 02:00:00 2.0 702.0 \n", + "28 #ar.wikipedia Main 2018-01-09 03:00:00 1.0 1299.0 \n", + "29 #ar.wikipedia Main 2018-01-09 04:00:00 2.0 447.0 \n", + "... ... ... ... ... ... \n", + "119253 #zh.wikipedia 模块 2018-01-24 04:00:00 1.0 1.0 \n", + "119254 #zh.wikipedia 模块 2018-01-24 08:00:00 6.0 6.0 \n", + "119255 #zh.wikipedia 模块 2018-01-24 13:00:00 9.0 6132.0 \n", + "119256 #zh.wikipedia 模块 2018-01-24 15:00:00 28.0 28.0 \n", + "119257 #zh.wikipedia 模块 2018-01-25 00:00:00 15.0 15.0 \n", + "119258 #zh.wikipedia 模块 2018-01-25 11:00:00 15.0 298.0 \n", + "119259 #zh.wikipedia 模块 2018-01-25 12:00:00 2.0 2.0 \n", + "119260 #zh.wikipedia 模块 2018-01-25 13:00:00 4.0 5020.0 \n", + "119261 #zh.wikipedia 模块 2018-01-25 14:00:00 2.0 47.0 \n", + "119262 #zh.wikipedia 模块 2018-01-25 15:00:00 49.0 49.0 \n", + "119263 #zh.wikipedia 模块 2018-01-25 18:00:00 49.0 49.0 \n", + "119264 #zh.wikipedia 模块 2018-01-25 19:00:00 42.0 84.0 \n", + "119265 #zh.wikipedia 模块 2018-01-25 20:00:00 22.0 173.0 \n", + "119266 #zh.wikipedia 模块 2018-01-26 08:00:00 70.0 70.0 \n", + "119267 #zh.wikipedia 模块 2018-01-26 09:00:00 1.0 18.0 \n", + "119268 #zh.wikipedia 模块 2018-01-26 16:00:00 1799.0 1799.0 \n", + "119269 #zh.wikipedia 模块 2018-01-26 17:00:00 28.0 3574.0 \n", + "119270 #zh.wikipedia 模块 2018-01-27 09:00:00 191.0 8720.0 \n", + "119271 #zh.wikipedia 模块 2018-01-27 11:00:00 130.0 191.0 \n", + "119272 #zh.wikipedia 模块 2018-01-27 12:00:00 2548.0 2548.0 \n", + "119273 #zh.wikipedia 模块 2018-01-27 13:00:00 12.0 1146.0 \n", + "119274 #zh.wikipedia 模块 2018-01-27 15:00:00 88.0 88.0 \n", + "119275 #zh.wikipedia 模块讨论 2018-01-12 07:00:00 438.0 438.0 \n", + "119276 #zh.wikipedia 模块讨论 2018-01-18 09:00:00 96.0 96.0 \n", + "119277 #zh.wikipedia 模块讨论 2018-01-22 05:00:00 26497.0 26497.0 \n", + "119278 #zh.wikipedia 模块讨论 2018-01-23 10:00:00 6.0 427.0 \n", + "119279 #zh.wikipedia 模块讨论 2018-01-23 11:00:00 185.0 185.0 \n", + "119280 #zh.wikipedia 模块讨论 2018-01-23 21:00:00 137.0 137.0 \n", + "119281 #zh.wikipedia 爆笑寵妃 2018-01-23 09:00:00 59.0 59.0 \n", + "119282 #zh.wikipedia 阿富汗 2018-01-09 23:00:00 28.0 28.0 \n", "\n", - " outliers1 outliers5 outliers10 \n", - "0 0.0 0.0 2.0 \n", - "1 0.0 0.0 0.0 \n", - "2 0.0 0.0 0.0 \n", - "3 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 \n", - "5 0.0 0.0 1.0 \n", - "6 1.0 4.0 8.0 \n", - "7 0.0 0.0 0.0 \n", - "8 0.0 0.0 0.0 \n", - "9 0.0 0.0 0.0 \n", - "10 0.0 0.0 4.0 \n", - "11 0.0 0.0 1.0 \n", - "12 0.0 0.0 0.0 \n", - "13 0.0 0.0 0.0 \n", - "14 0.0 0.0 1.0 \n", - "15 0.0 0.0 0.0 \n", - "16 0.0 2.0 2.0 \n", - "17 0.0 2.0 4.0 \n", - "18 0.0 0.0 0.0 \n", - "19 0.0 0.0 0.0 \n", - "20 0.0 0.0 1.0 \n", - "21 0.0 0.0 0.0 \n", - "22 1.0 1.0 1.0 \n", - "23 0.0 0.0 0.0 \n", - "24 0.0 0.0 0.0 \n", - "25 0.0 0.0 0.0 \n", - "26 0.0 3.0 8.0 \n", - "27 0.0 0.0 0.0 \n", - "28 0.0 0.0 0.0 \n", - "29 0.0 0.0 0.0 \n", - "... ... ... ... \n", - "994 0.0 0.0 1.0 \n", - "995 0.0 0.0 2.0 \n", - "996 0.0 0.0 0.0 \n", - "997 0.0 0.0 0.0 \n", - "998 0.0 0.0 0.0 \n", - "999 0.0 0.0 0.0 \n", - "1000 2.0 14.0 22.0 \n", - "1001 0.0 0.0 0.0 \n", - "1002 0.0 0.0 1.0 \n", - "1003 0.0 0.0 0.0 \n", - "1004 1.0 2.0 3.0 \n", - "1005 0.0 0.0 0.0 \n", - "1006 0.0 0.0 1.0 \n", - "1007 0.0 0.0 0.0 \n", - "1008 0.0 0.0 1.0 \n", - "1009 0.0 0.0 0.0 \n", - "1010 0.0 0.0 0.0 \n", - "1011 0.0 0.0 0.0 \n", - "1012 0.0 0.0 0.0 \n", - "1013 0.0 0.0 0.0 \n", - "1014 0.0 6.0 39.0 \n", - "1015 0.0 1.0 1.0 \n", - "1016 0.0 0.0 0.0 \n", - "1017 0.0 0.0 0.0 \n", - "1018 0.0 0.0 1.0 \n", - "1019 0.0 2.0 6.0 \n", - "1020 0.0 0.0 0.0 \n", - "1021 0.0 1.0 5.0 \n", - "1022 0.0 0.0 0.0 \n", - "1023 0.0 0.0 0.0 \n", + " lmin lmax m0 m1 m2 m3 \\\n", + "0 3.218876 3.218876 1.0 25.0 625.0 1.562500e+04 \n", + "1 0.000000 9.105868 149.0 21663.0 83984409.0 7.333297e+11 \n", + "2 0.693147 7.701200 185.0 22428.0 17164924.0 2.951661e+10 \n", + "3 0.000000 7.723120 105.0 11471.0 10752073.0 2.150527e+10 \n", + "4 0.000000 8.553332 64.0 7686.0 27016542.0 1.393251e+11 \n", + "5 0.000000 7.551712 79.0 13132.0 10292000.0 1.520400e+10 \n", + "6 1.098612 8.221479 63.0 22247.0 38118603.0 1.136075e+11 \n", + "7 0.000000 7.738924 122.0 25771.0 19301921.0 2.632441e+10 \n", + "8 1.791759 7.654443 185.0 35555.0 21997115.0 2.728389e+10 \n", + "9 1.386294 7.716015 166.0 32412.0 19844876.0 2.461746e+10 \n", + "10 0.000000 9.145268 118.0 48319.0 213810381.0 1.489308e+12 \n", + "11 0.693147 9.574983 109.0 38819.0 234665775.0 3.054160e+12 \n", + "12 0.000000 9.945637 148.0 59901.0 545245471.0 9.823238e+12 \n", + "13 0.000000 9.884050 205.0 69733.0 453862027.0 7.791436e+12 \n", + "14 0.000000 9.943333 216.0 91332.0 764243202.0 1.165487e+13 \n", + "15 0.000000 10.122422 207.0 98639.0 828905321.0 1.662305e+13 \n", + "16 0.000000 7.922261 165.0 11762.0 11136476.0 2.558908e+10 \n", + "17 1.609438 8.669743 126.0 29981.0 88989143.0 4.395026e+11 \n", + "18 0.000000 9.208439 187.0 36781.0 134903627.0 1.066277e+12 \n", + "19 0.000000 9.137662 319.0 56324.0 180817952.0 1.160272e+12 \n", + "20 0.000000 9.432924 169.0 36030.0 189039444.0 2.029805e+12 \n", + "21 0.000000 7.961370 166.0 27412.0 30194820.0 5.825853e+10 \n", + "22 0.000000 9.193296 166.0 81943.0 351898425.0 2.292421e+12 \n", + "23 0.000000 8.376551 116.0 41030.0 94661544.0 3.027321e+11 \n", + "24 0.000000 8.411833 133.0 34747.0 66006241.0 2.072867e+11 \n", + "25 0.000000 8.919186 76.0 35724.0 116030450.0 6.091646e+11 \n", + "26 0.000000 7.941296 75.0 11939.0 18132095.0 4.276567e+10 \n", + "27 0.693147 6.553933 42.0 3056.0 964256.0 5.605717e+08 \n", + "28 0.000000 7.169350 38.0 3930.0 1942072.0 2.222922e+09 \n", + "29 0.693147 6.102559 86.0 5300.0 532602.0 1.162811e+08 \n", + "... ... ... ... ... ... ... \n", + "119253 0.000000 0.000000 1.0 1.0 1.0 1.000000e+00 \n", + "119254 1.791759 1.791759 1.0 6.0 36.0 2.160000e+02 \n", + "119255 2.197225 8.721276 2.0 6141.0 37601505.0 2.305719e+11 \n", + "119256 3.332205 3.332205 1.0 28.0 784.0 2.195200e+04 \n", + "119257 2.708050 2.708050 1.0 15.0 225.0 3.375000e+03 \n", + "119258 2.708050 5.697093 3.0 431.0 102953.0 2.811000e+07 \n", + "119259 0.693147 0.693147 1.0 2.0 4.0 8.000000e+00 \n", + "119260 1.386294 8.521185 2.0 5024.0 25200416.0 1.265060e+11 \n", + "119261 0.693147 3.850148 3.0 53.0 2229.0 1.038950e+05 \n", + "119262 3.891820 3.891820 1.0 49.0 2401.0 1.176490e+05 \n", + "119263 3.891820 3.891820 1.0 49.0 2401.0 1.176490e+05 \n", + "119264 3.737670 4.430817 2.0 126.0 8820.0 6.667920e+05 \n", + "119265 3.091042 5.153292 2.0 195.0 30413.0 5.188365e+06 \n", + "119266 4.248495 4.248495 1.0 70.0 4900.0 3.430000e+05 \n", + "119267 0.000000 2.890372 2.0 19.0 325.0 5.833000e+03 \n", + "119268 7.494986 7.494986 1.0 1799.0 3236401.0 5.822285e+09 \n", + "119269 3.332205 8.181441 3.0 7016.0 24429656.0 8.544395e+10 \n", + "119270 5.252273 9.073375 2.0 8911.0 76074881.0 6.630618e+11 \n", + "119271 4.867534 5.252273 2.0 321.0 53381.0 9.164871e+06 \n", + "119272 7.843064 7.843064 1.0 2548.0 6492304.0 1.654239e+10 \n", + "119273 2.484907 7.044033 4.0 1244.0 1317608.0 1.505279e+09 \n", + "119274 4.477337 4.477337 1.0 88.0 7744.0 6.814720e+05 \n", + "119275 6.082219 6.082219 1.0 438.0 191844.0 8.402767e+07 \n", + "119276 4.564348 4.564348 1.0 96.0 9216.0 8.847360e+05 \n", + "119277 10.184787 10.184787 1.0 26497.0 702091009.0 1.860331e+13 \n", + "119278 1.791759 6.056784 3.0 632.0 221966.0 8.573530e+07 \n", + "119279 5.220356 5.220356 1.0 185.0 34225.0 6.331625e+06 \n", + "119280 4.919981 4.919981 1.0 137.0 18769.0 2.571353e+06 \n", + "119281 4.077537 4.077537 1.0 59.0 3481.0 2.053790e+05 \n", + "119282 3.332205 3.332205 1.0 28.0 784.0 2.195200e+04 \n", "\n", - "[1024 rows x 7 columns]" + " m4 m5 lm0 lm1 lm2 \\\n", + "0 3.906250e+05 9.765625e+06 1.0 3.218876 10.361162 \n", + "1 6.587312e+15 5.931574e+19 149.0 623.865921 2750.866322 \n", + "2 5.669284e+13 1.124865e+17 185.0 778.450524 3439.849081 \n", + "3 4.631962e+13 1.011019e+17 105.0 409.865354 1737.233189 \n", + "4 7.222052e+14 3.743906e+18 64.0 222.911934 854.601760 \n", + "5 2.606315e+13 4.699229e+16 79.0 339.670259 1597.102698 \n", + "6 3.783324e+14 1.295618e+18 63.0 306.030236 1616.485245 \n", + "7 4.700515e+13 9.415283e+16 122.0 533.746768 2632.325235 \n", + "8 4.519400e+13 8.302905e+16 185.0 847.318981 4131.665057 \n", + "9 4.288884e+13 8.480738e+16 166.0 755.340678 3693.919162 \n", + "10 1.171073e+16 9.747945e+19 118.0 505.935799 2471.909237 \n", + "11 4.322428e+16 6.199923e+20 109.0 485.395483 2407.659640 \n", + "12 1.954983e+17 4.003327e+21 148.0 633.654016 3107.065691 \n", + "13 1.492376e+17 2.910235e+21 205.0 917.447350 4610.700216 \n", + "14 2.099017e+17 4.096753e+21 216.0 957.165239 4692.063959 \n", + "15 3.926093e+17 9.629692e+21 207.0 967.286478 4983.871534 \n", + "16 6.514402e+13 1.714289e+17 165.0 561.789354 2125.120317 \n", + "17 2.415290e+15 1.372286e+19 126.0 507.100169 2243.439188 \n", + "18 1.007978e+16 9.939758e+19 187.0 710.183006 3039.636619 \n", + "19 9.029371e+15 7.672157e+19 319.0 1189.529105 5003.134721 \n", + "20 2.457818e+16 3.049638e+20 169.0 641.152635 2752.672525 \n", + "21 1.326486e+14 3.273461e+17 166.0 669.355818 2995.182800 \n", + "22 1.814749e+16 1.579637e+20 166.0 693.088089 3395.679190 \n", + "23 1.094226e+15 4.217602e+18 116.0 471.483026 2320.786841 \n", + "24 7.757698e+14 3.141152e+18 133.0 551.993346 2618.555596 \n", + "25 3.818369e+15 2.606292e+19 76.0 335.035936 1753.130422 \n", + "26 1.072078e+14 2.762855e+17 75.0 289.623485 1250.400635 \n", + "27 3.626704e+11 2.399057e+14 42.0 146.596532 582.824468 \n", + "28 2.851957e+12 3.699472e+15 38.0 151.273501 665.242663 \n", + "29 4.249718e+10 1.812852e+13 86.0 337.751266 1368.923662 \n", + "... ... ... ... ... ... \n", + "119253 1.000000e+00 1.000000e+00 1.0 0.000000 0.000000 \n", + "119254 1.296000e+03 7.776000e+03 1.0 1.791759 3.210402 \n", + "119255 1.413867e+15 8.669833e+18 2.0 10.918501 80.888455 \n", + "119256 6.146560e+05 1.721037e+07 1.0 3.332205 11.103587 \n", + "119257 5.062500e+04 7.593750e+05 1.0 2.708050 7.333536 \n", + "119258 8.080079e+09 2.372951e+12 3.0 13.175828 62.549842 \n", + "119259 1.600000e+01 3.200000e+01 1.0 0.693147 0.480453 \n", + "119260 6.350602e+14 3.188002e+18 2.0 9.907480 74.532409 \n", + "119261 4.879953e+06 2.293461e+08 3.0 5.929589 17.225902 \n", + "119262 5.764801e+06 2.824752e+08 1.0 3.891820 15.146265 \n", + "119263 5.764801e+06 2.824752e+08 1.0 3.891820 15.146265 \n", + "119264 5.289883e+07 4.312811e+09 2.0 8.168486 33.602312 \n", + "119265 8.959793e+08 1.549690e+11 2.0 8.244334 36.110958 \n", + "119266 2.401000e+07 1.680700e+09 1.0 4.248495 18.049712 \n", + "119267 1.049770e+05 1.889569e+06 2.0 2.890372 8.354249 \n", + "119268 1.047429e+13 1.884325e+16 1.0 7.494986 56.174819 \n", + "119269 2.990099e+14 1.046926e+18 3.0 19.649285 144.228195 \n", + "119270 5.781840e+15 5.041763e+19 2.0 14.325648 109.912501 \n", + "119271 1.616473e+09 2.913242e+11 2.0 10.119808 51.279268 \n", + "119272 4.215001e+13 1.073982e+17 1.0 7.843064 61.513653 \n", + "119273 1.724811e+12 1.976620e+15 4.0 16.921587 83.383945 \n", + "119274 5.996954e+07 5.277319e+09 1.0 4.477337 20.046545 \n", + "119275 3.680412e+10 1.612020e+13 1.0 6.082219 36.993387 \n", + "119276 8.493466e+07 8.153727e+09 1.0 4.564348 20.833274 \n", + "119277 4.929318e+17 1.306121e+22 1.0 10.184787 103.729882 \n", + "119278 3.481210e+10 1.450721e+13 3.0 13.141848 67.914111 \n", + "119279 1.171351e+09 2.166999e+11 1.0 5.220356 27.252115 \n", + "119280 3.522754e+08 4.826172e+10 1.0 4.919981 24.206212 \n", + "119281 1.211736e+07 7.149243e+08 1.0 4.077537 16.626312 \n", + "119282 6.146560e+05 1.721037e+07 1.0 3.332205 11.103587 \n", + "\n", + " lm3 lm4 lm5 \n", + "0 33.351293 107.353669 3.455581e+02 \n", + "1 12573.995231 60228.024755 3.084326e+05 \n", + "2 15837.660630 76250.085528 3.871860e+05 \n", + "3 7792.116092 37124.511576 1.893863e+05 \n", + "4 3555.379528 16586.848939 8.970690e+04 \n", + "5 7916.110671 41284.402101 2.266564e+05 \n", + "6 9106.009984 54137.728987 3.373556e+05 \n", + "7 13890.844091 77252.101951 4.486258e+05 \n", + "8 21252.017440 114463.683242 6.416142e+05 \n", + "9 19119.675276 103717.481404 5.852788e+05 \n", + "10 13561.437222 82846.690914 5.542719e+05 \n", + "11 13061.331524 76704.563149 4.838209e+05 \n", + "12 16850.137822 100333.756458 6.513246e+05 \n", + "13 25085.612638 146173.142790 9.073211e+05 \n", + "14 25194.242050 148637.342307 9.627241e+05 \n", + "15 27972.605325 170348.071088 1.118231e+06 \n", + "16 8656.205080 38185.491863 1.846790e+05 \n", + "17 11084.672780 61604.603177 3.816690e+05 \n", + "18 14510.236033 77742.351888 4.661678e+05 \n", + "19 23442.962217 123315.694340 7.269031e+05 \n", + "20 13162.823765 70715.579455 4.268993e+05 \n", + "21 14509.811177 76283.256699 4.338066e+05 \n", + "22 19308.399786 124886.042468 8.875766e+05 \n", + "23 12923.651010 79754.002744 5.325321e+05 \n", + "24 13703.861938 78719.835498 4.903965e+05 \n", + "25 10269.091366 65447.315751 4.447135e+05 \n", + "26 5902.212860 30618.199278 1.747318e+05 \n", + "27 2498.180334 11387.289389 5.507752e+04 \n", + "28 3040.736624 14415.383172 7.132009e+04 \n", + "29 5647.867090 23665.550578 1.008164e+05 \n", + "... ... ... ... \n", + "119253 0.000000 0.000000 0.000000e+00 \n", + "119254 5.752268 10.306681 1.846709e+01 \n", + "119255 673.953772 5808.531499 5.050575e+04 \n", + "119256 36.999422 123.289642 4.108263e+02 \n", + "119257 19.859583 53.780749 1.456410e+02 \n", + "119258 313.347501 1625.221166 8.618412e+03 \n", + "119259 0.333025 0.230835 1.600027e-01 \n", + "119260 621.392546 5275.992221 4.493136e+04 \n", + "119261 60.070411 223.664397 8.513123e+02 \n", + "119262 58.946542 229.409351 8.928200e+02 \n", + "119263 58.946542 229.409351 8.928200e+02 \n", + "119264 139.202300 580.586590 2.437194e+03 \n", + "119265 166.386446 796.532439 3.916503e+03 \n", + "119266 76.684115 325.792097 1.384126e+03 \n", + "119267 24.146885 69.793475 2.017291e+02 \n", + "119268 421.029492 3155.610250 2.365126e+04 \n", + "119269 1123.119020 8984.649590 7.270887e+04 \n", + "119270 891.866956 7538.599028 6.549264e+04 \n", + "119271 260.217157 1322.361263 6.729429e+03 \n", + "119272 482.455520 3783.929526 2.967760e+04 \n", + "119273 468.802091 2895.230560 1.895162e+04 \n", + "119274 89.755134 401.863964 1.799280e+03 \n", + "119275 225.001877 1368.510672 8.323581e+03 \n", + "119276 95.090318 434.025323 1.981043e+03 \n", + "119277 1056.466734 10759.888445 1.095872e+05 \n", + "119278 376.256674 2141.137567 1.232507e+04 \n", + "119279 142.265737 742.677769 3.877042e+03 \n", + "119280 119.094103 585.940714 2.882817e+03 \n", + "119281 67.794408 276.434238 1.127171e+03 \n", + "119282 36.999422 123.289642 4.108263e+02 \n", + "\n", + "[119283 rows x 19 columns]" ] }, - "execution_count": 153, + "execution_count": 219, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "t1 = wiki_data[metric].quantile(0.99)\n", - "t5 = wiki_data[metric].quantile(0.95)\n", - "t10 = wiki_data[metric].quantile(0.90)\n", - "wiki_oracle = wiki_data.groupby(attributes).agg({metric: [\n", - " 'count',\n", - " outliers(t1, \"1\"),\n", - " outliers(t5, \"5\"),\n", - " outliers(t10, \"10\")\n", + "big_wiki_cube = big_wiki_data.groupby(attributes + [pd.TimeGrouper(freq='H')]).agg({metric: [\n", + " 'min',\n", + " 'max',\n", + " log_min(),\n", + " log_max(),\n", + " moment(0),\n", + " moment(1),\n", + " moment(2),\n", + " moment(3),\n", + " moment(4),\n", + " moment(5),\n", + " log_moment(0),\n", + " log_moment(1),\n", + " log_moment(2),\n", + " log_moment(3),\n", + " log_moment(4),\n", + " log_moment(5)\n", "]}).reset_index(col_level=1)\n", - "wiki_oracle.columns = wiki_oracle.columns.get_level_values(1)\n", - "wiki_oracle" + "big_wiki_cube.columns = big_wiki_cube.columns.get_level_values(1)\n", + "big_wiki_cube" ] }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 220, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "wiki_oracle.to_csv('lib/src/test/resources/wiki_oracle_cubed.csv')" + "big_wiki_cube.to_csv('lib/src/test/resources/big_wiki_moments_cubed.csv')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/lib/cp.txt b/lib/cp.txt deleted file mode 100644 index 572ce2fa9..000000000 --- a/lib/cp.txt +++ /dev/null @@ -1 +0,0 @@ -/Users/Jialin/.m2/repository/junit/junit/4.12/junit-4.12.jar:/Users/Jialin/.m2/repository/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar:/Users/Jialin/.m2/repository/log4j/log4j/1.2.17/log4j-1.2.17.jar:/Users/Jialin/.m2/repository/org/slf4j/slf4j-log4j12/1.8.0-beta0/slf4j-log4j12-1.8.0-beta0.jar:/Users/Jialin/.m2/repository/org/slf4j/slf4j-api/1.8.0-beta0/slf4j-api-1.8.0-beta0.jar:/Users/Jialin/.m2/repository/com/google/guava/guava/21.0/guava-21.0.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-math3/3.6/commons-math3-3.6.jar:/Users/Jialin/.m2/repository/com/univocity/univocity-parsers/2.5.9/univocity-parsers-2.5.9.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/core/jackson-databind/2.8.9/jackson-databind-2.8.9.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/core/jackson-annotations/2.8.0/jackson-annotations-2.8.0.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/core/jackson-core/2.8.9/jackson-core-2.8.9.jar:/Users/Jialin/.m2/repository/futuredata/java-msketch/1.0-SNAPSHOT/java-msketch-1.0-SNAPSHOT.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-csv/1.5/commons-csv-1.5.jar:/Users/Jialin/.m2/repository/com/tdunning/t-digest/3.2/t-digest-3.2.jar:/Users/Jialin/.m2/repository/com/yahoo/datasketches/sketches-core/0.10.3/sketches-core-0.10.3.jar:/Users/Jialin/.m2/repository/com/yahoo/datasketches/memory/0.10.3/memory-0.10.3.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-catalyst_2.11/2.2.1/spark-catalyst_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/scala-lang/scala-reflect/2.11.8/scala-reflect-2.11.8.jar:/Users/Jialin/.m2/repository/org/scala-lang/scala-library/2.11.8/scala-library-2.11.8.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-core_2.11/2.2.1/spark-core_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/apache/avro/avro/1.7.7/avro-1.7.7.jar:/Users/Jialin/.m2/repository/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/Users/Jialin/.m2/repository/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/Users/Jialin/.m2/repository/com/thoughtworks/paranamer/paranamer/2.3/paranamer-2.3.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar:/Users/Jialin/.m2/repository/org/tukaani/xz/1.0/xz-1.0.jar:/Users/Jialin/.m2/repository/org/apache/avro/avro-mapred/1.7.7/avro-mapred-1.7.7-hadoop2.jar:/Users/Jialin/.m2/repository/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7.jar:/Users/Jialin/.m2/repository/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-tests.jar:/Users/Jialin/.m2/repository/com/twitter/chill_2.11/0.8.0/chill_2.11-0.8.0.jar:/Users/Jialin/.m2/repository/com/esotericsoftware/kryo-shaded/3.0.3/kryo-shaded-3.0.3.jar:/Users/Jialin/.m2/repository/com/esotericsoftware/minlog/1.3.0/minlog-1.3.0.jar:/Users/Jialin/.m2/repository/org/objenesis/objenesis/2.1/objenesis-2.1.jar:/Users/Jialin/.m2/repository/com/twitter/chill-java/0.8.0/chill-java-0.8.0.jar:/Users/Jialin/.m2/repository/org/apache/xbean/xbean-asm5-shaded/4.4/xbean-asm5-shaded-4.4.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-client/2.6.5/hadoop-client-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-common/2.6.5/hadoop-common-2.6.5.jar:/Users/Jialin/.m2/repository/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/Users/Jialin/.m2/repository/xmlenc/xmlenc/0.52/xmlenc-0.52.jar:/Users/Jialin/.m2/repository/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1.jar:/Users/Jialin/.m2/repository/commons-io/commons-io/2.4/commons-io-2.4.jar:/Users/Jialin/.m2/repository/commons-collections/commons-collections/3.2.2/commons-collections-3.2.2.jar:/Users/Jialin/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/Users/Jialin/.m2/repository/commons-configuration/commons-configuration/1.6/commons-configuration-1.6.jar:/Users/Jialin/.m2/repository/commons-digester/commons-digester/1.8/commons-digester-1.8.jar:/Users/Jialin/.m2/repository/commons-beanutils/commons-beanutils/1.7.0/commons-beanutils-1.7.0.jar:/Users/Jialin/.m2/repository/commons-beanutils/commons-beanutils-core/1.8.0/commons-beanutils-core-1.8.0.jar:/Users/Jialin/.m2/repository/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/Users/Jialin/.m2/repository/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-auth/2.6.5/hadoop-auth-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/directory/server/apacheds-kerberos-codec/2.0.0-M15/apacheds-kerberos-codec-2.0.0-M15.jar:/Users/Jialin/.m2/repository/org/apache/directory/server/apacheds-i18n/2.0.0-M15/apacheds-i18n-2.0.0-M15.jar:/Users/Jialin/.m2/repository/org/apache/directory/api/api-asn1-api/1.0.0-M20/api-asn1-api-1.0.0-M20.jar:/Users/Jialin/.m2/repository/org/apache/directory/api/api-util/1.0.0-M20/api-util-1.0.0-M20.jar:/Users/Jialin/.m2/repository/org/apache/curator/curator-client/2.6.0/curator-client-2.6.0.jar:/Users/Jialin/.m2/repository/org/htrace/htrace-core/3.0.4/htrace-core-3.0.4.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-hdfs/2.6.5/hadoop-hdfs-2.6.5.jar:/Users/Jialin/.m2/repository/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26.jar:/Users/Jialin/.m2/repository/xerces/xercesImpl/2.9.1/xercesImpl-2.9.1.jar:/Users/Jialin/.m2/repository/xml-apis/xml-apis/1.3.04/xml-apis-1.3.04.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-app/2.6.5/hadoop-mapreduce-client-app-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-common/2.6.5/hadoop-mapreduce-client-common-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-yarn-client/2.6.5/hadoop-yarn-client-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-yarn-server-common/2.6.5/hadoop-yarn-server-common-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-shuffle/2.6.5/hadoop-mapreduce-client-shuffle-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-yarn-api/2.6.5/hadoop-yarn-api-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-core/2.6.5/hadoop-mapreduce-client-core-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-yarn-common/2.6.5/hadoop-yarn-common-2.6.5.jar:/Users/Jialin/.m2/repository/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2.jar:/Users/Jialin/.m2/repository/javax/xml/stream/stax-api/1.0-2/stax-api-1.0-2.jar:/Users/Jialin/.m2/repository/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar:/Users/Jialin/.m2/repository/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-jobclient/2.6.5/hadoop-mapreduce-client-jobclient-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/hadoop/hadoop-annotations/2.6.5/hadoop-annotations-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-launcher_2.11/2.2.1/spark-launcher_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-network-common_2.11/2.2.1/spark-network-common_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-network-shuffle_2.11/2.2.1/spark-network-shuffle_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/net/java/dev/jets3t/jets3t/0.9.3/jets3t-0.9.3.jar:/Users/Jialin/.m2/repository/org/apache/httpcomponents/httpcore/4.3.3/httpcore-4.3.3.jar:/Users/Jialin/.m2/repository/org/apache/httpcomponents/httpclient/4.3.6/httpclient-4.3.6.jar:/Users/Jialin/.m2/repository/javax/activation/activation/1.1.1/activation-1.1.1.jar:/Users/Jialin/.m2/repository/mx4j/mx4j/3.0.2/mx4j-3.0.2.jar:/Users/Jialin/.m2/repository/javax/mail/mail/1.4.7/mail-1.4.7.jar:/Users/Jialin/.m2/repository/org/bouncycastle/bcprov-jdk15on/1.51/bcprov-jdk15on-1.51.jar:/Users/Jialin/.m2/repository/com/jamesmurty/utils/java-xmlbuilder/1.0/java-xmlbuilder-1.0.jar:/Users/Jialin/.m2/repository/net/iharder/base64/2.3.8/base64-2.3.8.jar:/Users/Jialin/.m2/repository/org/apache/curator/curator-recipes/2.6.0/curator-recipes-2.6.0.jar:/Users/Jialin/.m2/repository/org/apache/curator/curator-framework/2.6.0/curator-framework-2.6.0.jar:/Users/Jialin/.m2/repository/org/apache/zookeeper/zookeeper/3.4.6/zookeeper-3.4.6.jar:/Users/Jialin/.m2/repository/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-lang3/3.5/commons-lang3-3.5.jar:/Users/Jialin/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar:/Users/Jialin/.m2/repository/org/slf4j/jul-to-slf4j/1.7.16/jul-to-slf4j-1.7.16.jar:/Users/Jialin/.m2/repository/org/slf4j/jcl-over-slf4j/1.7.16/jcl-over-slf4j-1.7.16.jar:/Users/Jialin/.m2/repository/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3.jar:/Users/Jialin/.m2/repository/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar:/Users/Jialin/.m2/repository/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar:/Users/Jialin/.m2/repository/org/roaringbitmap/RoaringBitmap/0.5.11/RoaringBitmap-0.5.11.jar:/Users/Jialin/.m2/repository/commons-net/commons-net/2.2/commons-net-2.2.jar:/Users/Jialin/.m2/repository/org/json4s/json4s-jackson_2.11/3.2.11/json4s-jackson_2.11-3.2.11.jar:/Users/Jialin/.m2/repository/org/json4s/json4s-core_2.11/3.2.11/json4s-core_2.11-3.2.11.jar:/Users/Jialin/.m2/repository/org/json4s/json4s-ast_2.11/3.2.11/json4s-ast_2.11-3.2.11.jar:/Users/Jialin/.m2/repository/org/scala-lang/scalap/2.11.0/scalap-2.11.0.jar:/Users/Jialin/.m2/repository/org/scala-lang/scala-compiler/2.11.0/scala-compiler-2.11.0.jar:/Users/Jialin/.m2/repository/org/scala-lang/modules/scala-xml_2.11/1.0.1/scala-xml_2.11-1.0.1.jar:/Users/Jialin/.m2/repository/org/scala-lang/modules/scala-parser-combinators_2.11/1.0.1/scala-parser-combinators_2.11-1.0.1.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/core/jersey-client/2.22.2/jersey-client-2.22.2.jar:/Users/Jialin/.m2/repository/javax/ws/rs/javax.ws.rs-api/2.0.1/javax.ws.rs-api-2.0.1.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/hk2-api/2.4.0-b34/hk2-api-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/hk2-utils/2.4.0-b34/hk2-utils-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/external/aopalliance-repackaged/2.4.0-b34/aopalliance-repackaged-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/external/javax.inject/2.4.0-b34/javax.inject-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/hk2-locator/2.4.0-b34/hk2-locator-2.4.0-b34.jar:/Users/Jialin/.m2/repository/org/javassist/javassist/3.18.1-GA/javassist-3.18.1-GA.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/core/jersey-common/2.22.2/jersey-common-2.22.2.jar:/Users/Jialin/.m2/repository/javax/annotation/javax.annotation-api/1.2/javax.annotation-api-1.2.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/bundles/repackaged/jersey-guava/2.22.2/jersey-guava-2.22.2.jar:/Users/Jialin/.m2/repository/org/glassfish/hk2/osgi-resource-locator/1.0.1/osgi-resource-locator-1.0.1.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/core/jersey-server/2.22.2/jersey-server-2.22.2.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/media/jersey-media-jaxb/2.22.2/jersey-media-jaxb-2.22.2.jar:/Users/Jialin/.m2/repository/javax/validation/validation-api/1.1.0.Final/validation-api-1.1.0.Final.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet/2.22.2/jersey-container-servlet-2.22.2.jar:/Users/Jialin/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet-core/2.22.2/jersey-container-servlet-core-2.22.2.jar:/Users/Jialin/.m2/repository/io/netty/netty-all/4.0.43.Final/netty-all-4.0.43.Final.jar:/Users/Jialin/.m2/repository/io/netty/netty/3.9.9.Final/netty-3.9.9.Final.jar:/Users/Jialin/.m2/repository/com/clearspring/analytics/stream/2.7.0/stream-2.7.0.jar:/Users/Jialin/.m2/repository/io/dropwizard/metrics/metrics-core/3.1.2/metrics-core-3.1.2.jar:/Users/Jialin/.m2/repository/io/dropwizard/metrics/metrics-jvm/3.1.2/metrics-jvm-3.1.2.jar:/Users/Jialin/.m2/repository/io/dropwizard/metrics/metrics-json/3.1.2/metrics-json-3.1.2.jar:/Users/Jialin/.m2/repository/io/dropwizard/metrics/metrics-graphite/3.1.2/metrics-graphite-3.1.2.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/module/jackson-module-scala_2.11/2.6.5/jackson-module-scala_2.11-2.6.5.jar:/Users/Jialin/.m2/repository/com/fasterxml/jackson/module/jackson-module-paranamer/2.6.5/jackson-module-paranamer-2.6.5.jar:/Users/Jialin/.m2/repository/org/apache/ivy/ivy/2.4.0/ivy-2.4.0.jar:/Users/Jialin/.m2/repository/oro/oro/2.0.8/oro-2.0.8.jar:/Users/Jialin/.m2/repository/net/razorvine/pyrolite/4.13/pyrolite-4.13.jar:/Users/Jialin/.m2/repository/net/sf/py4j/py4j/0.10.4/py4j-0.10.4.jar:/Users/Jialin/.m2/repository/org/apache/commons/commons-crypto/1.0.0/commons-crypto-1.0.0.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-tags_2.11/2.2.1/spark-tags_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-unsafe_2.11/2.2.1/spark-unsafe_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/apache/spark/spark-sketch_2.11/2.2.1/spark-sketch_2.11-2.2.1.jar:/Users/Jialin/.m2/repository/org/codehaus/janino/janino/3.0.0/janino-3.0.0.jar:/Users/Jialin/.m2/repository/org/codehaus/janino/commons-compiler/3.0.0/commons-compiler-3.0.0.jar:/Users/Jialin/.m2/repository/org/antlr/antlr4-runtime/4.5.3/antlr4-runtime-4.5.3.jar:/Users/Jialin/.m2/repository/commons-codec/commons-codec/1.10/commons-codec-1.10.jar:/Users/Jialin/.m2/repository/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar \ No newline at end of file diff --git a/lib/genCP.sh b/lib/genCP.sh deleted file mode 100644 index 25afe7f0d..000000000 --- a/lib/genCP.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -mvn dependency:build-classpath -Dmdep.outputFile=cp.txt \ No newline at end of file diff --git a/lib/momentBench.sh b/lib/momentBench.sh deleted file mode 100644 index d49b022d0..000000000 --- a/lib/momentBench.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -java -Xmx8g -Xms8g -cp target/macrobase-lib-0.2.1-SNAPSHOT.jar:$(cat cp.txt) edu.stanford.futuredata.macrobase.APLMomentSummarizerBench $@ \ No newline at end of file diff --git a/lib/momentBenchMilan.json b/lib/momentBenchMilan.json deleted file mode 100644 index 1d941ff1f..000000000 --- a/lib/momentBenchMilan.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "minSupport": 0.006, - "percentile": 1.0, - "outlierColumn": "outliers1", - "numWarmupTrials": 10, - "numTrials": 10, - "oracleCubeFilename": "src/test/resources/milan_oracle_cubed.csv", - "momentCubeFilename": "src/test/resources/milan_moments_cubed.csv", - "attributes": [ - "Grid", - "Country" - ], - "doContainment": true, - "verbose": true -} \ No newline at end of file diff --git a/lib/momentBenchWiki.json b/lib/momentBenchWiki.json deleted file mode 100644 index 326cc09c2..000000000 --- a/lib/momentBenchWiki.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "minSupport": 0.1, - "percentile": 1.0, - "outlierColumn": "outliers1", - "numWarmupTrials": 10, - "numTrials": 10, - "oracleCubeFilename": "src/test/resources/wiki_oracle_cubed.csv", - "momentCubeFilename": "src/test/resources/wiki_moments_cubed.csv", - "attributes": [ - "channel", - "countryName", - "namespace" - ], - "doContainment": true, - "verbose": false -} \ No newline at end of file diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java deleted file mode 100644 index 2707c7f62..000000000 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/APLMomentSummarizerBench.java +++ /dev/null @@ -1,149 +0,0 @@ -package edu.stanford.futuredata.macrobase; - -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLExplanation; -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLMomentSummarizer; -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLOutlierSummarizer; -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.EstimatedSupportMetric; -import edu.stanford.futuredata.macrobase.datamodel.DataFrame; -import edu.stanford.futuredata.macrobase.datamodel.Schema; -import edu.stanford.futuredata.macrobase.ingest.CSVDataFrameParser; - -import java.io.IOException; -import java.util.*; - -public class APLMomentSummarizerBench { - double minSupport = 0.1; - double percentile = 1.0; - String outlierColumn = "outliers1"; - int numWarmupTrials; - int numTrials; - int numMoments; - String oracleCubeFilename; - String momentCubeFilename; - boolean doContainment; - List attributes; - boolean verbose; - - public APLMomentSummarizerBench(String confFile) throws IOException { - RunConfig conf = RunConfig.fromJsonFile(confFile); - minSupport = conf.get("minSupport"); - percentile = conf.get("percentile"); - outlierColumn = conf.get("outlierColumn"); - numWarmupTrials = conf.get("numWarmupTrials", 10); - numTrials = conf.get("numTrials", 10); - numMoments = conf.get("numMoments", 8); - oracleCubeFilename = conf.get("oracleCubeFilename"); - momentCubeFilename = conf.get("momentCubeFilename"); - doContainment = conf.get("doContainment", false); - attributes = conf.get("attributes"); - verbose = conf.get("verbose", false); - } - - public static void main(String[] args) throws Exception { - String confFile = args[0]; - APLMomentSummarizerBench bench = new APLMomentSummarizerBench(confFile); - bench.run(); - } - - public void run() throws Exception { - System.out.format("minSupport: %f, percentile: %f\n\n", minSupport, percentile); - testOracleOrder3(); - testCubeOrder3(true); - testCubeOrder3(false); - } - - public void testOracleOrder3() throws Exception { - List requiredColumns = new ArrayList<>(attributes); - Map colTypes = new HashMap<>(); - colTypes.put("count", Schema.ColType.DOUBLE); - colTypes.put(outlierColumn, Schema.ColType.DOUBLE); - requiredColumns.add("count"); - requiredColumns.add(outlierColumn); - CSVDataFrameParser loader = new CSVDataFrameParser(oracleCubeFilename, requiredColumns); - loader.setColumnTypes(colTypes); - DataFrame df = loader.load(); - - APLOutlierSummarizer summ = new APLOutlierSummarizer(); - summ.setCountColumn("count"); - summ.setOutlierColumn(outlierColumn); - summ.setMinSupport(minSupport); - summ.setMinRatioMetric(10.0); - summ.setAttributes(attributes); - summ.setDoContainment(doContainment); - summ.onlyUseSupport(true); - for (int i = 0; i < numWarmupTrials; i++) { - summ.process(df); - } - long start = System.nanoTime(); - for (int i = 0; i < numTrials; i++) { - summ.process(df); - } - long timeElapsed = System.nanoTime() - start; - System.out.format("Oracle time: %g\n", timeElapsed / (1.e9 * numTrials)); - APLExplanation e = summ.getResults(); - System.out.format("Num results: %d\n\n", e.getResults().size()); - if (verbose) { - System.out.println(e.prettyPrint()); - } - } - - public void testCubeOrder3(boolean useCascade) throws Exception { - List requiredColumns = new ArrayList<>(attributes); - Map colTypes = new HashMap<>(); - List momentColumns = new ArrayList<>(); - for (int i = 0; i <= numMoments; i++) { - colTypes.put("m" + i, Schema.ColType.DOUBLE); - requiredColumns.add("m" + i); - momentColumns.add("m" + i); - } - colTypes.put("min", Schema.ColType.DOUBLE); - colTypes.put("max", Schema.ColType.DOUBLE); - requiredColumns.add("min"); - requiredColumns.add("max"); - CSVDataFrameParser loader = new CSVDataFrameParser(momentCubeFilename, requiredColumns); - loader.setColumnTypes(colTypes); - DataFrame df = loader.load(); - - APLMomentSummarizer summ = new APLMomentSummarizer(); - summ.setMinSupport(minSupport); - summ.setMinRatioMetric(10.0); - summ.setAttributes(attributes); - summ.setMinColumn("min"); - summ.setMaxColumn("max"); - summ.setMomentColumns(momentColumns); - summ.setPercentile(percentile); - summ.setCascade(useCascade); - summ.setDoContainment(doContainment); - for (int i = 0; i < numWarmupTrials; i++) { - summ.process(df); - } - long start = System.nanoTime(); - for (int i = 0; i < numTrials; i++) { - summ.process(df); - } - long timeElapsed = System.nanoTime() - start; - System.out.format("%s time: %g\n", useCascade ? "Cascade" : "Maxent", timeElapsed / (1.e9 * numTrials)); - if (useCascade) { - EstimatedSupportMetric metric = (EstimatedSupportMetric)summ.qualityMetricList.get(0); - int prunedByNaive = metric.numEnterCascade - metric.numAfterNaiveCheck; - int prunedByMarkov = metric.numAfterNaiveCheck - metric.numAfterMarkovBound; - int prunedByMoments = metric.numAfterMarkovBound - metric.numAfterMomentBound; - System.out.format("Cascade PTR\n\t" + - "Entered cascade: %d\n\t" + - "Pruned by naive checks: %d (%f)\n\t" + - "Pruned by Markov bounds: %d (%f)\n\t" + - "Pruned by moment bounds: %d (%f)\n\t" + - "Reached maxent: %d (%f)\n", - metric.numEnterCascade, - prunedByNaive, prunedByNaive / (double)metric.numEnterCascade, - prunedByMarkov, prunedByMarkov / (double)metric.numEnterCascade, - prunedByMoments, prunedByMoments / (double)metric.numEnterCascade, - metric.numAfterMomentBound, metric.numAfterMomentBound / (double)metric.numEnterCascade); - } - APLExplanation e = summ.getResults(); - System.out.format("Num results: %d\n\n", e.getResults().size()); - if (verbose) { - System.out.println(e.prettyPrint()); - } - } -} \ No newline at end of file diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/RunConfig.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/RunConfig.java deleted file mode 100644 index ce86dd1d4..000000000 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/RunConfig.java +++ /dev/null @@ -1,50 +0,0 @@ -package edu.stanford.futuredata.macrobase; - -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.util.Map; - -public class RunConfig { - private Map values; - - public RunConfig(Map values) { - this.values = values; - } - - public static RunConfig fromJsonFile(String file) throws IOException { - BufferedReader r = new BufferedReader(new FileReader(file)); - ObjectMapper mapper = new ObjectMapper(); - Map map = mapper.readValue( - r, - new TypeReference>() {} - ); - return new RunConfig(map); - } - - public static RunConfig fromJsonString(String json) throws IOException { - ObjectMapper mapper = new ObjectMapper(); - Map map = mapper.readValue( - json, - new TypeReference>() {} - ); - return new RunConfig(map); - } - - @SuppressWarnings("unchecked") - public T get(String key) { - return (T) values.get(key); - } - - @SuppressWarnings("unchecked") - public T get(String key, T defaultValue) { - return (T) values.getOrDefault(key, defaultValue); - } - - public Map getValues() { - return values; - } -} From 0f5d11eb0f734dc1c7e061c521918f20a8549c22 Mon Sep 17 00:00:00 2001 From: Jialin Ding Date: Sun, 18 Feb 2018 17:30:43 -0800 Subject: [PATCH 4/7] Remove dep on msketch --- lib/pom.xml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/pom.xml b/lib/pom.xml index 55df85f61..4138f6a4e 100644 --- a/lib/pom.xml +++ b/lib/pom.xml @@ -81,11 +81,6 @@ jackson-databind 2.8.9 - - futuredata - java-msketch - 1.0-SNAPSHOT - From 4390d0d5683b5aea45ecb220b7194efdaa82b9d9 Mon Sep 17 00:00:00 2001 From: Jialin Ding Date: Sun, 18 Feb 2018 17:37:33 -0800 Subject: [PATCH 5/7] Remove usages of msketch --- .../macrobase/pipeline/CubePipeline.java | 26 ++-- .../summary/aplinear/APLMomentSummarizer.java | 113 --------------- .../summary/aplinear/APrioriLinear.java | 3 - .../metrics/EstimatedGlobalRatioMetric.java | 67 --------- .../metrics/EstimatedSupportMetric.java | 133 ------------------ 5 files changed, 13 insertions(+), 329 deletions(-) delete mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLMomentSummarizer.java delete mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedGlobalRatioMetric.java delete mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java diff --git a/core/src/main/java/edu/stanford/futuredata/macrobase/pipeline/CubePipeline.java b/core/src/main/java/edu/stanford/futuredata/macrobase/pipeline/CubePipeline.java index cfa0af29e..3d0a8a114 100644 --- a/core/src/main/java/edu/stanford/futuredata/macrobase/pipeline/CubePipeline.java +++ b/core/src/main/java/edu/stanford/futuredata/macrobase/pipeline/CubePipeline.java @@ -265,19 +265,19 @@ private APLSummarizer getSummarizer(CubeClassifier classifier) throws Exception summarizer.setMinStdDev(minRatioMetric); return summarizer; } - case "moment": { - APLMomentSummarizer summarizer = new APLMomentSummarizer(); - summarizer.setMinColumn(minColumn.orElseThrow( - () -> new MacrobaseException("min column not present in config"))); - summarizer.setMaxColumn(maxColumn.orElseThrow( - () -> new MacrobaseException("max column not present in config"))); - summarizer.setMomentColumns(momentColumns); - summarizer.setAttributes(attributes); - summarizer.setMinSupport(minSupport); - summarizer.setMinRatioMetric(minRatioMetric); - summarizer.setPercentile(cutoff); - return summarizer; - } +// case "moment": { +// APLMomentSummarizer summarizer = new APLMomentSummarizer(); +// summarizer.setMinColumn(minColumn.orElseThrow( +// () -> new MacrobaseException("min column not present in config"))); +// summarizer.setMaxColumn(maxColumn.orElseThrow( +// () -> new MacrobaseException("max column not present in config"))); +// summarizer.setMomentColumns(momentColumns); +// summarizer.setAttributes(attributes); +// summarizer.setMinSupport(minSupport); +// summarizer.setMinRatioMetric(minRatioMetric); +// summarizer.setPercentile(cutoff); +// return summarizer; +// } default: { APLOutlierSummarizer summarizer = new APLOutlierSummarizer(); summarizer.setOutlierColumn(classifier.getOutputColumnName()); diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLMomentSummarizer.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLMomentSummarizer.java deleted file mode 100644 index d210fa2f9..000000000 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLMomentSummarizer.java +++ /dev/null @@ -1,113 +0,0 @@ -package edu.stanford.futuredata.macrobase.analysis.summary.aplinear; - -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.*; -import edu.stanford.futuredata.macrobase.datamodel.DataFrame; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; -import java.util.stream.IntStream; - -/** - * Summarizer that works over both cube and row-based labeled ratio-based - * outlier summarization. - */ -public class APLMomentSummarizer extends APLSummarizer { - private Logger log = LoggerFactory.getLogger("APLMomentSummarizer"); - private String minColumn = null; - private String maxColumn = null; - private List momentColumns; - private double percentile; - private boolean useCascade = false; - - @Override - public List getAggregateNames() { - ArrayList aggregateNames = new ArrayList<>(); - aggregateNames.add("Minimum"); - aggregateNames.add("Maximum"); - for (int i = 0; i < momentColumns.size(); i++) { - aggregateNames.add("M" + i); - } - return aggregateNames; - } - - @Override - public double[][] getAggregateColumns(DataFrame input) { - double[][] aggregateColumns = new double[2+momentColumns.size()][]; - aggregateColumns[0] = input.getDoubleColumnByName(minColumn); - aggregateColumns[1] = input.getDoubleColumnByName(maxColumn); - for (int i = 0; i < momentColumns.size(); i++) { - aggregateColumns[i+2] = input.getDoubleColumnByName(momentColumns.get(i)); - } - - processCountCol(input, momentColumns.get(0), aggregateColumns[2].length); - return aggregateColumns; - } - - @Override - public Map getAggregationOps() { - Map aggregationOps = new HashMap<>(); - aggregationOps.put("add", IntStream.range(2, 2+momentColumns.size()).toArray()); - aggregationOps.put("min", new int[]{0}); - aggregationOps.put("max", new int[]{1}); - return aggregationOps; - } - - @Override - public List getQualityMetricList() { - List qualityMetricList = new ArrayList<>(); - if (useCascade) { - qualityMetricList.add( - new EstimatedSupportMetric(0, 1, 2, - (100.0 - percentile) / 100.0, 1e-5, true) - ); - } else { - qualityMetricList.add( - new EstimatedSupportMetric(0, 1, 2, - (100.0 - percentile) / 100.0, 1e-5, false) - ); - } - return qualityMetricList; - } - - @Override - public List getThresholds() { - return Arrays.asList(minOutlierSupport); - } - - @Override - public double getNumberOutliers(double[][] aggregates) { - double count = 0.0; - double[] counts = aggregates[2]; - for (int i = 0; i < counts.length; i++) { - count += counts[i]; - } - return count * percentile / 100.0; - } - - public String getMinColumn() { - return minColumn; - } - public void setMinColumn(String minColumn) { - this.minColumn = minColumn; - } - public String getMaxColumn() { - return maxColumn; - } - public void setMaxColumn(String maxColumn) { - this.maxColumn = maxColumn; - } - public List getMomentColumns() { - return momentColumns; - } - public void setMomentColumns(List momentColumns) { - this.momentColumns = momentColumns; - } - public void setPercentile(double percentile) { - this.percentile = percentile; - } - public void setCascade(boolean useCascade) { this.useCascade = useCascade; } - public double getMinRatioMetric() { - return minRatioMetric; - } -} diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java index 768b476ab..cb3edf953 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java @@ -1,6 +1,5 @@ package edu.stanford.futuredata.macrobase.analysis.summary.aplinear; -import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.EstimatedGlobalRatioMetric; import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.QualityMetric; import edu.stanford.futuredata.macrobase.analysis.summary.apriori.APrioriSummarizer; import edu.stanford.futuredata.macrobase.analysis.summary.apriori.IntSet; @@ -9,8 +8,6 @@ import org.slf4j.LoggerFactory; import java.util.*; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; /** diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedGlobalRatioMetric.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedGlobalRatioMetric.java deleted file mode 100644 index 01c5fe410..000000000 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedGlobalRatioMetric.java +++ /dev/null @@ -1,67 +0,0 @@ -package edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics; - -import sketches.MomentSketch; - -import java.util.Arrays; -import java.util.Collections; - -/** - * Measures the relative outlier rate w.r.t. the global outlier rate - */ -public class EstimatedGlobalRatioMetric implements QualityMetric{ - private int minIdx = 0; - private int maxIdx = 1; - private int momentsBaseIdx = 2; - private double baseRate = 0.0; - private double quantile; - private double cutoff; - private double globalCount; - private double tolerance = 1e-10; - - public EstimatedGlobalRatioMetric(int minIdx, int maxIdx, int momentsBaseIdx, - double quantile, double tolerance) { - this.minIdx = minIdx; - this.maxIdx = maxIdx; - this.momentsBaseIdx = momentsBaseIdx; - this.quantile = quantile; - this.tolerance = tolerance; - } - - @Override - public String name() { - return "est_global_ratio"; - } - - @Override - public QualityMetric initialize(double[] globalAggregates) { - globalCount = globalAggregates[momentsBaseIdx]; - MomentSketch ms = new MomentSketch(tolerance); - double[] powerSums = Arrays.copyOfRange(globalAggregates, momentsBaseIdx, globalAggregates.length); - ms.setStats(powerSums, globalAggregates[minIdx], globalAggregates[maxIdx]); - try { - cutoff = ms.getQuantiles(Collections.singletonList(quantile))[0]; - } catch (Exception e) { - cutoff = quantile * (globalAggregates[maxIdx] - globalAggregates[minIdx]) + globalAggregates[minIdx]; - } - baseRate = 1.0 - quantile; - return this; - } - - @Override - public double value(double[] aggregates) { - MomentSketch ms = new MomentSketch(tolerance); - double[] powerSums = Arrays.copyOfRange(aggregates, momentsBaseIdx, aggregates.length); - ms.setStats(powerSums, aggregates[minIdx], aggregates[maxIdx]); - return ms.estimateGreaterThanThreshold(cutoff) / baseRate; - } - - @Override - public boolean canPassThreshold(double[] aggregates, double threshold) { - return true; - } - - @Override - public boolean isMonotonic() { - return false; - } -} diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java deleted file mode 100644 index 8fc599027..000000000 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/EstimatedSupportMetric.java +++ /dev/null @@ -1,133 +0,0 @@ -package edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics; - -import sketches.MomentSketch; - -import java.util.Arrays; -import java.util.Collections; - -/** - * Measures the relative outlier rate w.r.t. the global outlier rate - */ -public class EstimatedSupportMetric implements QualityMetric{ - private int minIdx = 0; - private int maxIdx = 1; - private int momentsBaseIdx = 2; - private double quantile; // eg, 0.99 - private double cutoff; - private double globalCount; - private double tolerance = 1e-10; - private boolean useCascade = true; - - // Statistics - public int numEnterCascade = 0; - public int numAfterNaiveCheck = 0; - public int numAfterMarkovBound = 0; - public int numAfterMomentBound = 0; - - public EstimatedSupportMetric(int minIdx, int maxIdx, int momentsBaseIdx, double quantile, - double tolerance, boolean useCascade) { - this.minIdx = minIdx; - this.maxIdx = maxIdx; - this.momentsBaseIdx = momentsBaseIdx; - this.quantile = quantile; - this.tolerance = tolerance; - this.useCascade = useCascade; - } - - @Override - public String name() { - return "est_support"; - } - - @Override - public QualityMetric initialize(double[] globalAggregates) { - globalCount = globalAggregates[momentsBaseIdx] * (1.0 - quantile); - MomentSketch ms = new MomentSketch(tolerance); - double[] powerSums = Arrays.copyOfRange(globalAggregates, momentsBaseIdx, globalAggregates.length); - ms.setStats(powerSums, globalAggregates[0], globalAggregates[1]); - try { - cutoff = ms.getQuantiles(Collections.singletonList(quantile))[0]; - } catch (Exception e) { - cutoff = quantile * (globalAggregates[maxIdx] - globalAggregates[minIdx]) + globalAggregates[minIdx]; - } - return this; - } - - @Override - public double value(double[] aggregates) { - MomentSketch ms = new MomentSketch(tolerance); - double[] powerSums = Arrays.copyOfRange(aggregates, momentsBaseIdx, aggregates.length); - ms.setStats(powerSums, aggregates[minIdx], aggregates[maxIdx]); - return ms.estimateGreaterThanThreshold(cutoff) * aggregates[momentsBaseIdx] / globalCount; - } - - @Override - public Action getAction(double[] aggregates, double threshold) { - if (useCascade) { - return getActionCascade(aggregates, threshold); - } else { - return getActionMaxent(aggregates, threshold); - } - } - - private Action getActionCascade(double[] aggregates, double threshold) { - numEnterCascade++; - double outlierRateNeeded = threshold * globalCount / aggregates[momentsBaseIdx]; - - // Simple checks on min and max - if (aggregates[maxIdx] < cutoff) { - return Action.PRUNE; - } - if (aggregates[minIdx] >= cutoff && outlierRateNeeded <= 1.0) { - return Action.KEEP; - } - numAfterNaiveCheck++; - - // Markov bounds - double mean = aggregates[momentsBaseIdx+1] / aggregates[momentsBaseIdx]; - double min = aggregates[minIdx]; - double max = aggregates[maxIdx]; - double cutoffLowerBound = Math.max(0.0, 1 - (mean - min) / (cutoff - min)); - double cutoffUpperBound = Math.min(1.0, (max - mean) / (max - cutoff)); - double outlierRateUpperBound = 1.0 - cutoffLowerBound; - double outlierRateLowerBound = 1.0 - cutoffUpperBound; - if (outlierRateUpperBound < outlierRateNeeded) { - return Action.PRUNE; - } - if (outlierRateLowerBound >= outlierRateNeeded) { - return Action.KEEP; - } - numAfterMarkovBound++; - - // Moments-based bounds - MomentSketch ms = new MomentSketch(tolerance); - double[] powerSums = Arrays.copyOfRange(aggregates, momentsBaseIdx, aggregates.length); - ms.setStats(powerSums, aggregates[0], aggregates[1]); - double[] bounds = ms.boundGreaterThanThreshold(cutoff); - if (bounds[1] < outlierRateNeeded) { - return Action.PRUNE; - } - if (bounds[0] >= outlierRateNeeded) { - return Action.KEEP; - } - numAfterMomentBound++; - - // Maxent estimate - double outlierRateEstimate = ms.estimateGreaterThanThreshold(cutoff); - return (outlierRateEstimate >= outlierRateNeeded) ? Action.KEEP : Action.PRUNE; - } - - private Action getActionMaxent(double[] aggregates, double threshold) { - double outlierRateNeeded = threshold * globalCount / aggregates[momentsBaseIdx]; - MomentSketch ms = new MomentSketch(tolerance); - double[] powerSums = Arrays.copyOfRange(aggregates, momentsBaseIdx, aggregates.length); - ms.setStats(powerSums, aggregates[minIdx], aggregates[maxIdx]); - double outlierRateEstimate = ms.estimateGreaterThanThreshold(cutoff); - return (outlierRateEstimate >= outlierRateNeeded) ? Action.KEEP : Action.PRUNE; - } - - @Override - public boolean isMonotonic() { - return true; - } -} From 1c803ae2af86c09f28be3330b43aa3abeda636d2 Mon Sep 17 00:00:00 2001 From: Jialin Ding Date: Mon, 19 Feb 2018 00:05:41 -0800 Subject: [PATCH 6/7] Add timers for cascade bench --- .../analysis/summary/aplinear/APLSummarizer.java | 10 +++++----- .../analysis/summary/aplinear/APrioriLinear.java | 12 ++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLSummarizer.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLSummarizer.java index 03dae6d84..833fcf1ba 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLSummarizer.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APLSummarizer.java @@ -20,12 +20,12 @@ */ public abstract class APLSummarizer extends BatchSummarizer { Logger log = LoggerFactory.getLogger("APLSummarizer"); - AttributeEncoder encoder; - APLExplanation explanation; - APrioriLinear aplKernel; - boolean doContainment = true; + protected AttributeEncoder encoder; + protected APLExplanation explanation; + protected APrioriLinear aplKernel; + protected boolean doContainment = true; public List qualityMetricList; - List thresholds; + protected List thresholds; protected long numEvents = 0; protected long numOutliers = 0; diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java index cb3edf953..7026ba8a6 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java @@ -24,6 +24,10 @@ public class APrioriLinear { private double[] thresholds; private boolean doContainment = true; + public long mergeTime = 0; + public long queryTime = 0; + private long start; + // **Cached values** // Singleton viable sets for quick lookup @@ -64,6 +68,7 @@ public List explain( // Quality metrics are initialized with global aggregates to // allow them to determine the appropriate relative thresholds double[] globalAggregates = new double[numAggregates]; + start = System.nanoTime(); if (aggregationOps == null) { for (int j = 0; j < numAggregates; j++) { globalAggregates[j] = 0; @@ -95,9 +100,12 @@ public List explain( } } } + mergeTime += System.nanoTime() - start; + start = System.nanoTime(); for (QualityMetric q : qualityMetrics) { q.initialize(globalAggregates); } + queryTime += System.nanoTime() - start; // Row store for more convenient access final double[][] aRows = new double[numRows][numAggregates]; @@ -120,6 +128,7 @@ public List explain( threadSetAggregates.add(new HashMap<>()); } final CountDownLatch doneSignal = new CountDownLatch(numThreads); + start = System.nanoTime(); for (int threadNum = 0; threadNum < numThreads; threadNum++) { final int startIndex = (numRows * threadNum) / numThreads; final int endIndex = (numRows * (threadNum + 1)) / numThreads; @@ -192,6 +201,7 @@ public List explain( } } } + mergeTime += System.nanoTime() - start; HashSet curOrderNext = new HashSet<>(); HashSet curOrderSaved = new HashSet<>(); @@ -199,11 +209,13 @@ public List explain( for (IntSet curCandidate: setAggregates.keySet()) { double[] curAggregates = setAggregates.get(curCandidate); QualityMetric.Action action = QualityMetric.Action.KEEP; + start = System.nanoTime(); for (int i = 0; i < qualityMetrics.length; i++) { QualityMetric q = qualityMetrics[i]; double t = thresholds[i]; action = QualityMetric.Action.combine(action, q.getAction(curAggregates, t)); } + queryTime += System.nanoTime() - start; if (action == QualityMetric.Action.KEEP) { // if a set is already past the threshold on all metrics, // save it and no need for further exploration if we do containment From f98a44d188108cbafb828a5325bb0b5673212733 Mon Sep 17 00:00:00 2001 From: Edward Gan Date: Sun, 25 Feb 2018 11:55:18 -0800 Subject: [PATCH 7/7] initial aggregation ops --- .../summary/aplinear/APrioriLinear.java | 38 ++++------------- .../aplinear/metrics/AggregationOp.java | 41 +++++++++++++++++++ 2 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/AggregationOp.java diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java index 7026ba8a6..82b2617e3 100644 --- a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/APrioriLinear.java @@ -1,5 +1,6 @@ package edu.stanford.futuredata.macrobase.analysis.summary.aplinear; +import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.AggregationOp; import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics.QualityMetric; import edu.stanford.futuredata.macrobase.analysis.summary.apriori.APrioriSummarizer; import edu.stanford.futuredata.macrobase.analysis.summary.apriori.IntSet; @@ -60,7 +61,7 @@ public List explain( public List explain( final List attributes, double[][] aggregateColumns, - Map aggregationOps + AggregationOp[] aggregationOps ) { final int numAggregates = aggregateColumns.length; final int numRows = aggregateColumns[0].length; @@ -69,35 +70,12 @@ public List explain( // allow them to determine the appropriate relative thresholds double[] globalAggregates = new double[numAggregates]; start = System.nanoTime(); - if (aggregationOps == null) { - for (int j = 0; j < numAggregates; j++) { - globalAggregates[j] = 0; - double[] curColumn = aggregateColumns[j]; - for (int i = 0; i < numRows; i++) { - globalAggregates[j] += curColumn[i]; - } - } - } else { - for (int j : aggregationOps.getOrDefault("add", new int[0])) { - globalAggregates[j] = 0; - double[] curColumn = aggregateColumns[j]; - for (int i = 0; i < numRows; i++) { - globalAggregates[j] += curColumn[i]; - } - } - for (int j : aggregationOps.getOrDefault("min", new int[0])) { - double[] curColumn = aggregateColumns[j]; - globalAggregates[j] = curColumn[0]; - for (int i = 0; i < numRows; i++) { - globalAggregates[j] = Math.min(globalAggregates[j], curColumn[i]); - } - } - for (int j : aggregationOps.getOrDefault("max", new int[0])) { - double[] curColumn = aggregateColumns[j]; - globalAggregates[j] = curColumn[0]; - for (int i = 0; i < numRows; i++) { - globalAggregates[j] = Math.max(globalAggregates[j], curColumn[i]); - } + for (int j = 0; j < numAggregates; j++) { + AggregationOp curOp = aggregationOps[j]; + globalAggregates[j] = curOp.initValue(); + double[] curColumn = aggregateColumns[j]; + for (int i = 0; i < numRows; i++) { + globalAggregates[j] = curOp.combine(globalAggregates[j], curColumn[i]); } } mergeTime += System.nanoTime() - start; diff --git a/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/AggregationOp.java b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/AggregationOp.java new file mode 100644 index 000000000..85c9116ce --- /dev/null +++ b/lib/src/main/java/edu/stanford/futuredata/macrobase/analysis/summary/aplinear/metrics/AggregationOp.java @@ -0,0 +1,41 @@ +package edu.stanford.futuredata.macrobase.analysis.summary.aplinear.metrics; + +import edu.stanford.futuredata.macrobase.util.MacrobaseInternalError; + +public enum AggregationOp { + SUM, MIN, MAX; + + public double combine(double a, double b) { + switch(this) { + case SUM: { + return a+b; + } + case MIN: { + return a < b ? a : b; + } + case MAX: { + return a > b ? a : b; + } + default: { + throw new MacrobaseInternalError("Invalid Aggregation Op"); + } + } + } + + public double initValue() { + switch(this) { + case SUM: { + return 0; + } + case MIN: { + return Double.MAX_VALUE; + } + case MAX: { + return -Double.MAX_VALUE; + } + default: { + throw new MacrobaseInternalError("Invalid Aggregation Op"); + } + } + } +}