From 62a3c488cb86f6dd19ed2a47acf902ebc4d3c273 Mon Sep 17 00:00:00 2001 From: gj Date: Sat, 8 Sep 2018 21:35:41 +0800 Subject: [PATCH] docs work for 0.1.9 --- README.rst | 2 +- docs/bot/exception.rst | 12 ----- docs/bot/index.rst | 22 ++++++-- docs/change/0.1.9.rst | 69 +++++++++++++++++++++++++ docs/change/index.rst | 1 + docs/index.rst | 15 +++--- docs/{node/index.rst => node.rst} | 79 ++++++++++++++++++++++++++--- docs/node/db.rst | 2 - docs/node/file.rst | 2 - docs/node/http.rst | 36 ------------- docs/node/util.rst | 17 ------- docs/{pipe/index.rst => pipe.rst} | 0 docs/pipe/application.rst | 2 - docs/{route/index.rst => route.rst} | 27 +++++++--- docs/route/Branch.rst | 15 ------ docs/route/Fork.rst | 2 - examples/crawler.py | 42 ++++++--------- 17 files changed, 207 insertions(+), 138 deletions(-) delete mode 100644 docs/bot/exception.rst create mode 100644 docs/change/0.1.9.rst rename docs/{node/index.rst => node.rst} (54%) delete mode 100644 docs/node/db.rst delete mode 100644 docs/node/file.rst delete mode 100644 docs/node/http.rst delete mode 100644 docs/node/util.rst rename docs/{pipe/index.rst => pipe.rst} (100%) delete mode 100644 docs/pipe/application.rst rename docs/{route/index.rst => route.rst} (74%) delete mode 100644 docs/route/Branch.rst delete mode 100644 docs/route/Fork.rst diff --git a/README.rst b/README.rst index 8716bcf..efae461 100644 --- a/README.rst +++ b/README.rst @@ -113,7 +113,7 @@ below is the flow graph generated by Botflow.Aggreate 6 exchanges bitcoin price - **Fast** Nodes will be run in parallel, and they will perform well when processing stream data. -:Web Crawle: Botflow is 5x fatter than Scrapy +:Web Crawle: Botflow is 10x fatter than Scrapy diff --git a/docs/bot/exception.rst b/docs/bot/exception.rst deleted file mode 100644 index df3db07..0000000 --- a/docs/bot/exception.rst +++ /dev/null @@ -1,12 +0,0 @@ -Exception ---------- - - -Exception behavior will act according to ```config.Exception_policy = config.Exception_raise ``` setting. - -:Exception_default: default exception policy is raise -:Exception_raise: raise exception -:Exception_ignore: ignore exception. exception raised from node will be suppressed. -:Exception_retry: the value will put in input-queue after some delay. -:Exception_pipein: the exception tread as returen value ,put in output queue. it will be usefull - in blockedjoin route scenarios. \ No newline at end of file diff --git a/docs/bot/index.rst b/docs/bot/index.rst index e4f9832..4f0546f 100644 --- a/docs/bot/index.rst +++ b/docs/bot/index.rst @@ -1,4 +1,4 @@ -Bot BotFrame +Bot ============ @@ -7,13 +7,27 @@ Bot BotFrame .. toctree:: :maxdepth: 1 - run replay +.. contents:: + :local: -Exception policy ----------------- +Run +--- + +Exception +--------- + + +Exception behavior will act according to ```config.Exception_policy = config.Exception_raise ``` setting. + +:Exception_default: default exception policy is raise +:Exception_raise: raise exception +:Exception_ignore: ignore exception. exception raised from node will be suppressed. +:Exception_retry: the value will put in input-queue after some delay. +:Exception_pipein: the exception tread as returen value ,put in output queue. it will be usefull + in blockedjoin route scenarios. How to debug diff --git a/docs/change/0.1.9.rst b/docs/change/0.1.9.rst new file mode 100644 index 0000000..dda2591 --- /dev/null +++ b/docs/change/0.1.9.rst @@ -0,0 +1,69 @@ +Version 0.1.9 +============= + + +#. Officially rename project to Botflow. + +#. Enable Http Server support.Pipe can be work as Coroutine for intergate other asyncio framework. + + .. code-block:: python + + from botflow import * + from aiohttp import web + + p = Pipe( + {"msg":"hello world!"} + ) + app = web.Application() + + app.add_routes([ + web.get('/', p.aiohttp_json_handle) + ]) + + Bot.run_app(app) + + +#. Add new Route "SendTo". It can be used for redirect to data flow to the Node. + + .. code-block:: python + + + def filter(url): + global count + if 'http' not in url: + url = "http://127.0.0.1:8080{}".format(url) + + if url in seen : #filter out processed links + return None + seen.add(url) + return url + + + def find_all_links(r): + for a in r.soup.find_all('a', href=True): + yield a.get('href') + + + + + b = Return( + + filter, + HttpLoader(), + find_all_links, + ) + + Pipe( + "http://127.0.0.1:8080/", + b, + SendTo(b), + + ) + +#. Add new Node type "SpeedLimit" "Delay" .For speed control + +#. Add new Node type "Zip". For zip multi flow item to list. + +#. Rewrite whole project for code more readable. + +#. import flow graph performance by reduce the node . \ No newline at end of file diff --git a/docs/change/index.rst b/docs/change/index.rst index 8434fb5..26d9bef 100644 --- a/docs/change/index.rst +++ b/docs/change/index.rst @@ -4,4 +4,5 @@ Change .. toctree:: :maxdepth: 1 + 0.1.9 0.1.8 \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index ec61014..fb75722 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,16 +6,17 @@ Welcome to Botflow's world! -The Botflow applicaton is made with one or many pipes and run by ```BotFrame.run()```. +The Botflow applicaton is made with one or many pipes and run by ```Bot.run()```. The simplest applicaton looks like: .. code-block:: python + from botflow import * Pipe(print) - BotFrame.run() + Bot.run() -Concept of the Botflow is very simple. ```I doubt if it is good to write a guide :-)``` +Concept of the Botflow is very simple. **Pipe** works at the Top level. It combines the Node and Route together. @@ -75,10 +76,10 @@ The above two code blocks look like pseudo code, but they are workable samples. :caption: Contents: bot/index - pipe/index - node/index - route/index - change/0.1.8 + pipe + node + route + change/index .. toctree:: :maxdepth: 1 diff --git a/docs/node/index.rst b/docs/node.rst similarity index 54% rename from docs/node/index.rst rename to docs/node.rst index 9b642b6..d59b409 100644 --- a/docs/node/index.rst +++ b/docs/node.rst @@ -1,6 +1,9 @@ Node ==== +.. contents:: + :local: + Node is callable thing .In python world ,we have three callable things , - function @@ -46,12 +49,76 @@ Exception behavior will act according to ```config.Exception_policy = config.Exc :local: -.. toctree:: - :maxdepth: 1 +Timer +------ + + It will send a message in the pipe by timer param. delay, max_time until some finished + +```Timer(self, delay=1, max_time=None, until=None)``` + + +:dealy: the delay between every count +:max_time: will stop when reach the max count time. +:until: the function ref. Timer will count until function return True. + + +HttpLoader +----------- + + Get a url and return the HTTP response + + Init parameter + -------------- + + :timeout: + default timeout=20 . + + Callable parameter: + ------------------ + + can be call by string (url), and Httprequest. + + HttpResponse + ------------ + HttpResponse + + + :json: return a json object + :text: get text body of the httpresponse + :xml: get lxml object + :css: get css object + + + HttpRequest + ------------ + + :head: + :body: + :url: + :proxy: + + + +AioFile +---------------- + for file I/O. + +SpeedLimit +---------- + + limit the stream speed limit + +Delay +------ + + delay in special second. +Zip +----- + Wait for all branched to finish and merged the result into a tuple. + +Filter +----- - http - db - file - util + Drop data from pipe if it does not match some condition diff --git a/docs/node/db.rst b/docs/node/db.rst deleted file mode 100644 index c98ca14..0000000 --- a/docs/node/db.rst +++ /dev/null @@ -1,2 +0,0 @@ -DB -========= \ No newline at end of file diff --git a/docs/node/file.rst b/docs/node/file.rst deleted file mode 100644 index 0350664..0000000 --- a/docs/node/file.rst +++ /dev/null @@ -1,2 +0,0 @@ -File -==== \ No newline at end of file diff --git a/docs/node/http.rst b/docs/node/http.rst deleted file mode 100644 index b724bc4..0000000 --- a/docs/node/http.rst +++ /dev/null @@ -1,36 +0,0 @@ -HTTP -==== - -.. contents:: - :local: - - -Init parameter --------------- - -:timeout: - default timeout=20 . - -Callable parameter: ------------------- - -can be call by string (url), and Httprequest. - -HttpResponse ------------- -HttpResponse - - -:json: return a json object -:text: get text body of the httpresponse -:xml: get lxml object -:css: get css object - - -HttpRequest ------------- - -:head: -:body: -:url: -:proxy: diff --git a/docs/node/util.rst b/docs/node/util.rst deleted file mode 100644 index 066cb27..0000000 --- a/docs/node/util.rst +++ /dev/null @@ -1,17 +0,0 @@ -Timer -==== - -```Timer(self, delay=1, max_time=None, until=None)``` - - -:dealy: the delay between every count -:max_time: will stop when reach the max count time. -:until: the function ref. Timer will count until function return True. - - - -Loop -==== - - -'''Loop(it)''' \ No newline at end of file diff --git a/docs/pipe/index.rst b/docs/pipe.rst similarity index 100% rename from docs/pipe/index.rst rename to docs/pipe.rst diff --git a/docs/pipe/application.rst b/docs/pipe/application.rst deleted file mode 100644 index dd28cc8..0000000 --- a/docs/pipe/application.rst +++ /dev/null @@ -1,2 +0,0 @@ -application -========== \ No newline at end of file diff --git a/docs/route/index.rst b/docs/route.rst similarity index 74% rename from docs/route/index.rst rename to docs/route.rst index 5634c70..c717fea 100644 --- a/docs/route/index.rst +++ b/docs/route.rst @@ -31,6 +31,22 @@ Return ------ Route Return is derived from Branch with parameter (share=False,join=True) + Branch,Return,Filter + ==================== + + + .. image:: Botflow_branch.jpg + :width: 300 + + + + :share: True|False. if keep the orignal data for parent pipe + + :route_type: list of Type for route upflow message to Branch + + :join: True|False ,if Return the final message to parent Pipe. + + Filter ------ @@ -38,12 +54,11 @@ Filter sametime. -Fork +Join ---- - Basic route of Botflow. it create branch for every pass in (node or route). - + Join is derived from Fork with parameter(share=False,join=True) . -Join ----- - Join is derived from Fork with parameter(share=False,join=True) . \ No newline at end of file +SendTo +------ + Send the stream to speicaled Node. diff --git a/docs/route/Branch.rst b/docs/route/Branch.rst deleted file mode 100644 index 6dbc6c9..0000000 --- a/docs/route/Branch.rst +++ /dev/null @@ -1,15 +0,0 @@ -Branch,Return,Filter -==================== - - -.. image:: Botflow_branch.jpg - :width: 300 - - - -:share: True|False. if keep the orignal data for parent pipe - -:route_type: list of Type for route upflow message to Branch - -:join: True|False ,if Return the final message to parent Pipe. - diff --git a/docs/route/Fork.rst b/docs/route/Fork.rst deleted file mode 100644 index b8afc2e..0000000 --- a/docs/route/Fork.rst +++ /dev/null @@ -1,2 +0,0 @@ -Fork,Join -========= \ No newline at end of file diff --git a/examples/crawler.py b/examples/crawler.py index 42d63b5..467c538 100644 --- a/examples/crawler.py +++ b/examples/crawler.py @@ -1,38 +1,36 @@ import logging - - -#logging.basicConfig(level=logging.DEBUG) -logger=logging.getLogger(__name__) from botflow import * from botflow.route import SendTo from botflow.config import config -from botflow.node import Delay,Node,SpeedLimit -from botflow.queue import QueueManager -config.coroutine_batch_size = 8 -config.default_queue_max_size=0 +import datetime -import datetime + +config.default_queue_max_size = 0 +# logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + start = datetime.datetime.now() seen = set() -to_do = set() count = 1 + def print_speed(): end = datetime.datetime.now() - s=(end-start).total_seconds() + s = (end - start).total_seconds() print(f"count {count} time {s} speed{count/s}") # QueueManager().debug_print() -def fitler(url): + +def filter_out(url): global count if 'http' not in url: url = "http://127.0.0.1:8080{}".format(url) - if url in seen : + if url in seen: return None count += 1 @@ -44,33 +42,25 @@ def fitler(url): return url -def perf_parse(r): +def find_all_links(r): for a in r.soup.find_all('a', href=True): yield a.get('href') - - -# 0:00:46.989379 是否拆分,区别不大 b = Return( - fitler, + filter_out, HttpLoader(), - perf_parse, - - - - + find_all_links, ) - Pipe( "http://127.0.0.1:8080/", b, SendTo(b), ) -BotFlow.render('ex_output/crawler') +Bot.render('ex_output/crawler') try: @@ -85,4 +75,4 @@ def perf_parse(r): raise BotFlow.debug_print() print_speed() -BotFlow.stop() \ No newline at end of file +BotFlow.stop()