diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c678a5e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..18c24a9
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,12 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+requests-html = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.9"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..8c311ec
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,232 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "992b7fd81898db822d0416144b6720a0f5f3801c56ded482ae0f796a208dd988"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.9"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "appdirs": {
+            "hashes": [
+                "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41",
+                "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"
+            ],
+            "version": "==1.4.4"
+        },
+        "beautifulsoup4": {
+            "hashes": [
+                "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
+                "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25",
+                "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"
+            ],
+            "version": "==4.9.3"
+        },
+        "bs4": {
+            "hashes": [
+                "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
+            ],
+            "version": "==0.0.1"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
+                "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
+            ],
+            "version": "==2020.12.5"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
+                "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==4.0.0"
+        },
+        "cssselect": {
+            "hashes": [
+                "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf",
+                "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.1.0"
+        },
+        "fake-useragent": {
+            "hashes": [
+                "sha256:c104998b750eb097eefc28ae28e92d66397598d2cf41a31aa45d5559ef1adf35"
+            ],
+            "version": "==0.1.11"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
+                "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==2.10"
+        },
+        "lxml": {
+            "hashes": [
+                "sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d",
+                "sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3",
+                "sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2",
+                "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f",
+                "sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927",
+                "sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3",
+                "sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7",
+                "sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f",
+                "sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade",
+                "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468",
+                "sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b",
+                "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4",
+                "sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83",
+                "sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04",
+                "sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791",
+                "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51",
+                "sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1",
+                "sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a",
+                "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f",
+                "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee",
+                "sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec",
+                "sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969",
+                "sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28",
+                "sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a",
+                "sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa",
+                "sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106",
+                "sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d",
+                "sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4",
+                "sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0",
+                "sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4",
+                "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2",
+                "sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0",
+                "sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654",
+                "sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2",
+                "sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23",
+                "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==4.6.3"
+        },
+        "parse": {
+            "hashes": [
+                "sha256:9ff82852bcb65d139813e2a5197627a94966245c897796760a3a2a8eb66f020b"
+            ],
+            "version": "==1.19.0"
+        },
+        "pyee": {
+            "hashes": [
+                "sha256:383973b63ad7ed5e3c0311f8b179c52981f9e7b3eaea0e9a830d13ec34dde65f",
+                "sha256:92dacc5bd2bdb8f95aa8dd2585d47ca1c4840e2adb95ccf90034d64f725bfd31"
+            ],
+            "version": "==8.1.0"
+        },
+        "pyppeteer": {
+            "hashes": [
+                "sha256:c2974be1afa13b17f7ecd120d265d8b8cd324d536a231c3953ca872b68aba4af",
+                "sha256:d4cb4a5ef94b00c1073aed888b39646ce26cff3339cff7a3f1f1cc307bf50408"
+            ],
+            "markers": "python_full_version >= '3.6.1' and python_full_version < '4.0.0'",
+            "version": "==0.2.5"
+        },
+        "pyquery": {
+            "hashes": [
+                "sha256:1fc33b7699455ed25c75282bc8f80ace1ac078b0dda5a933dacbd8b1c1f83963",
+                "sha256:a388eefb6bc4a55350de0316fbd97cda999ae669b6743ae5b99102ba54f5aa72"
+            ],
+            "version": "==1.4.3"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
+                "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==2.25.1"
+        },
+        "requests-html": {
+            "hashes": [
+                "sha256:7e929ecfed95fb1d0994bb368295d6d7c4d06b03fcb900c33d7d0b17e6003947",
+                "sha256:cb8a78cf829c4eca9d6233f28524f65dd2bfaafb4bdbbc407f0a0b8f487df6e2"
+            ],
+            "index": "pypi",
+            "version": "==0.10.0"
+        },
+        "six": {
+            "hashes": [
+                "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
+                "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
+            "version": "==1.15.0"
+        },
+        "soupsieve": {
+            "hashes": [
+                "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc",
+                "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b"
+            ],
+            "markers": "python_version >= '3.0'",
+            "version": "==2.2.1"
+        },
+        "tqdm": {
+            "hashes": [
+                "sha256:daec693491c52e9498632dfbe9ccfc4882a557f5fa08982db1b4d3adbe0887c3",
+                "sha256:ebdebdb95e3477ceea267decfc0784859aa3df3e27e22d23b83e9b272bf157ae"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==4.60.0"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df",
+                "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_full_version < '4.0.0'",
+            "version": "==1.26.4"
+        },
+        "w3lib": {
+            "hashes": [
+                "sha256:0161d55537063e00d95a241663ede3395c4c6d7b777972ba2fd58bbab2001e53",
+                "sha256:0ad6d0203157d61149fd45aaed2e24f53902989c32fc1dccc2e2bfba371560df"
+            ],
+            "version": "==1.22.0"
+        },
+        "websockets": {
+            "hashes": [
+                "sha256:0e4fb4de42701340bd2353bb2eee45314651caa6ccee80dbd5f5d5978888fed5",
+                "sha256:1d3f1bf059d04a4e0eb4985a887d49195e15ebabc42364f4eb564b1d065793f5",
+                "sha256:20891f0dddade307ffddf593c733a3fdb6b83e6f9eef85908113e628fa5a8308",
+                "sha256:295359a2cc78736737dd88c343cd0747546b2174b5e1adc223824bcaf3e164cb",
+                "sha256:2db62a9142e88535038a6bcfea70ef9447696ea77891aebb730a333a51ed559a",
+                "sha256:3762791ab8b38948f0c4d281c8b2ddfa99b7e510e46bd8dfa942a5fff621068c",
+                "sha256:3db87421956f1b0779a7564915875ba774295cc86e81bc671631379371af1170",
+                "sha256:3ef56fcc7b1ff90de46ccd5a687bbd13a3180132268c4254fc0fa44ecf4fc422",
+                "sha256:4f9f7d28ce1d8f1295717c2c25b732c2bc0645db3215cf757551c392177d7cb8",
+                "sha256:5c01fd846263a75bc8a2b9542606927cfad57e7282965d96b93c387622487485",
+                "sha256:5c65d2da8c6bce0fca2528f69f44b2f977e06954c8512a952222cea50dad430f",
+                "sha256:751a556205d8245ff94aeef23546a1113b1dd4f6e4d102ded66c39b99c2ce6c8",
+                "sha256:7ff46d441db78241f4c6c27b3868c9ae71473fe03341340d2dfdbe8d79310acc",
+                "sha256:965889d9f0e2a75edd81a07592d0ced54daa5b0785f57dc429c378edbcffe779",
+                "sha256:9b248ba3dd8a03b1a10b19efe7d4f7fa41d158fdaa95e2cf65af5a7b95a4f989",
+                "sha256:9bef37ee224e104a413f0780e29adb3e514a5b698aabe0d969a6ba426b8435d1",
+                "sha256:c1ec8db4fac31850286b7cd3b9c0e1b944204668b8eb721674916d4e28744092",
+                "sha256:c8a116feafdb1f84607cb3b14aa1418424ae71fee131642fc568d21423b51824",
+                "sha256:ce85b06a10fc65e6143518b96d3dca27b081a740bae261c2fb20375801a9d56d",
+                "sha256:d705f8aeecdf3262379644e4b55107a3b55860eb812b673b28d0fbc347a60c55",
+                "sha256:e898a0863421650f0bebac8ba40840fc02258ef4714cb7e1fd76b6a6354bda36",
+                "sha256:f8a7bff6e8664afc4e6c28b983845c5bc14965030e3fb98789734d416af77c4b"
+            ],
+            "markers": "python_full_version >= '3.6.1'",
+            "version": "==8.1"
+        }
+    },
+    "develop": {}
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d05be9d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,29 @@
+# LowestCommonAncestorExtractor
+
+A python library for the structured extraction of content from German and English Terms and Conditions. Developed by [Tobias Schamel](https://wwwmatthes.in.tum.de/pages/665u6pdbc45i/Bachelor-s-Thesis-Tobias-Schamel) as part of the [AGB-Check](project) project.
+
+For citation, please use:
+```
+@InProceedings{schamel-EtAl:2022:ECNLP,
+  author    = {Schamel, Tobias and Braun, Daniel  and  Matthes, Florian},
+  title     = {Structured Extraction of Terms and Conditions from German and English Online Shops},
+  booktitle = {Proceedings of The Fifth Workshop on e-Commerce and NLP (ECNLP 5)},
+  month     = {May},
+  year      = {2022},
+  address   = {Dublin, Ireland},
+  publisher = {Association for Computational Linguistics}
+}
+
+```
+
+## License
+
+The software is provided under the MIT license.
+
+## Acknowledgements
+
+The project was supported by funds of the Federal Ministry for the Environment, Nature Conservation,
+Nuclear Safety and Consumer Protection (BMUV) based on a decision of the Parliament of the Federal
+Republic of Germany via the Federal Office for Agriculture and Food (BLE) under the innovation
+support programme.
+
diff --git a/src/ContentExtractor/ContentExtractor.py b/src/ContentExtractor/ContentExtractor.py
new file mode 100644
index 0000000..a0bcdc1
--- /dev/null
+++ b/src/ContentExtractor/ContentExtractor.py
@@ -0,0 +1,17 @@
+from src.ContentExtractor.TreeUtilities import getFrequencyOfStyles, getLowestCommonAncestorNodeOfStyle
+
+# Enum for different content extractors presented in the thesis.
+from src.DOMParser.DOMParser import cleanMainContent
+
+
+# Extracting the main content using the method presented in the thesis.
+# Attributes: body = body HTML node; contentExtractor = content extraction method; threshold = minimum coverage for
+# main content node.
+def getMainContent(bodyDOMNode, contentExtractor, threshold):
+    dic = getFrequencyOfStyles(bodyDOMNode, contentExtractor)
+    max_key = max(dic, key=dic.get)
+    mainContent = getLowestCommonAncestorNodeOfStyle(bodyDOMNode, max_key, dic[max_key], threshold, contentExtractor)
+
+    # Clean main content from empty nodes and other
+    cleanMainContent(mainContent)
+    return mainContent
diff --git a/src/ContentExtractor/ContentExtractorTypes.py b/src/ContentExtractor/ContentExtractorTypes.py
new file mode 100644
index 0000000..0cdab9a
--- /dev/null
+++ b/src/ContentExtractor/ContentExtractorTypes.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+# Different types of content extractors presented in Section 4.4.
+class ContentExtractor(Enum):
+    NaiveStyle = 1
+    RenderedStyle = 2
+    NaiveStyleAndShortTextExclusion = 3
+    RenderedStyleAndShortTextExclusion = 4
diff --git a/src/ContentExtractor/TreeUtilities.py b/src/ContentExtractor/TreeUtilities.py
new file mode 100644
index 0000000..8ea1fe5
--- /dev/null
+++ b/src/ContentExtractor/TreeUtilities.py
@@ -0,0 +1,169 @@
+from src.ContentExtractor.ContentExtractorTypes import ContentExtractor
+from src.DOMParser.DOMNode import TextElement, TreeElement
+from src.ContentExtractor.ContentExtractor import *
+
+
+
+# Calculate the depth of the subtree expanded by the given 'node'.
+# Attributes: node = node, of which subtree is investigated
+def getDepth(node):
+    if node.children == []:
+        return 0
+    else:
+        depths = []
+        for child in node.children:
+            depths.append(getDepth(child))
+        return 1 if len(depths) == 0 else 1 + max(depths)
+
+
+
+# Calculate the frequency (number of characters) of different styles in the node.
+# Attributes: node = node, of which subtree is investigated; contentExtractor = content extraction method
+def getFrequencyOfStyles(node, contentExtractor):
+    classes = dict()
+    if isinstance(node, TreeElement):
+        style = getStyle(node, node, contentExtractor)
+        noC = numberOfCharacters(node, contentExtractor)
+        classes[style] = noC
+    getStylesRec(classes, node, contentExtractor)
+    return classes
+
+
+
+# Recursive counterpart to getLowestCommonAncestorNodeOfStyle
+# Attributes: classes = collector dictionary; node = investigated node in this recursion step;
+# contentExtractor = content extraction method
+def getStylesRec(classes, node, contentExtractor):
+    for child in node.children:
+        style = getStyle(child, node, contentExtractor)
+        #print(style + '\t' + str(c.text))
+        if style in classes.keys():
+            classes[style] = classes[style] + numberOfCharacters(child, contentExtractor)
+        else:
+            classes[style] = numberOfCharacters(child, contentExtractor)
+        getStylesRec(classes, child, contentExtractor)
+    return classes
+
+
+
+# Find lowest common ancestor of a given style covering at least 'threshold' (%) of all occurrences of the
+# given style ('noOfCharacters') in the document (i.e. in tree expanded by 'rootNode').
+# Whenever there is no such node, extract a maximum subsequence of direct children to the body containing the 'style'.
+# Attributes: rootNode = root node of whole content tree; style = most common style; noOfCharacters = number of total
+# (valid) characters for the style; threshold = minimum coverage for main content node; contentExtractor = content
+# extraction method
+def getLowestCommonAncestorNodeOfStyle(rootNode, style, noOfCharacters, threshhold, contentExtractor):
+    resultList = []
+    getLowestCommonAncestorNodeOfStyleRec(rootNode, style, noOfCharacters, 1, resultList, contentExtractor)
+    maxDepth = getDepth(rootNode)
+    # find common ancestor
+    for currentDepth in range(maxDepth, 0, -1):
+        for result in resultList:
+            if result[2] == currentDepth and result[1] > threshhold:
+                return result[0]
+
+    # find maximum subsequence if no common ancestor
+    # TODO largest relevant character subsequence??? TODODODODOD
+    bitmap = []
+    for child in rootNode.children:
+        styleDict = getFrequencyOfStyles(child, contentExtractor)
+        bitmap.append(True) if style in styleDict.keys() else bitmap.append(False)
+    maxSubSeq = findMaximumSubsequenceOfTrue(bitmap)
+    returnList = []
+    for index in maxSubSeq:
+        returnList.append(rootNode.children[index])
+    toReturn = TreeElement()
+    toReturn.children = returnList
+    toReturn.tag = 'body'
+    toReturn.xpath = '/html/body'
+    return toReturn
+
+
+
+# Recursive counterpart to getLowestCommonAncestorNodeOfStyle
+# Attributes: node = node investigated in this recursion step; style = most common style; noOfCharacters = number of
+# total (valid) characters for the style; depth = current depth in subtree expanded by root; resultList = collector
+# list; contentExtractor = content extraction method
+def getLowestCommonAncestorNodeOfStyleRec(node, style, noOfCharacters, depth, resultList, contentExtractor):
+    for child in node.children:
+        styleDict = getFrequencyOfStyles(child, contentExtractor)
+        if style in styleDict.keys():
+            coverage = float(float(styleDict[style])/float(noOfCharacters))
+            resultList.append((child, coverage, depth))
+            #if coverage > 0.51:
+                #print(str(child.xpath) + '\t' + str(coverage))
+            getLowestCommonAncestorNodeOfStyleRec(child, style, noOfCharacters, depth+1, resultList, contentExtractor)
+
+
+
+# Calculate the number of (valid, according to used content extractor) characters in a node.
+# Attributes: node = node investigated; contentExtractor = content extraction method
+def numberOfCharacters(node, contentExtractor):
+    sum = 0
+    if contentExtractor == ContentExtractor.RenderedStyle or contentExtractor == ContentExtractor.NaiveStyle:
+        for child in node.children:
+            if isinstance(child, TextElement) and child.text is not None:
+                sum = sum + len(child.text)
+    elif contentExtractor == ContentExtractor.NaiveStyleAndShortTextExclusion or ContentExtractor.RenderedStyleAndShortTextExclusion:
+        for child in node.children:
+            if isinstance(child, TextElement) and child.text is not None and len(child.text.split()) > 3:
+                sum = sum + len(child.text)
+    return sum
+
+
+
+# Extract style of node according to chosen method.
+# Attributes: node = node investigated; parent = parent of 'node'; contentExtractor = content extraction method
+def getStyle(node, parent, contentExtractor):
+    if contentExtractor == ContentExtractor.NaiveStyle or contentExtractor == ContentExtractor.NaiveStyleAndShortTextExclusion:
+        if isinstance(node, TextElement):
+            return str(parent.tag) + '$' + str(parent.attributes)
+        else:
+            return str(node.tag) + '$' + str(node.attributes)
+    elif contentExtractor == ContentExtractor.RenderedStyle or contentExtractor == ContentExtractor.RenderedStyleAndShortTextExclusion:
+        if isinstance(node, TextElement):
+            return parent.style
+        else:
+            return node.style
+    else:
+        return None
+
+
+
+# Return list of all tags + '$' not rendered in HTML documents.
+# These tags do not have to be included in the process of parsing.
+def getListOfUnrederedTags():
+    return ['noframes$', 'audio$', 'canvas$', 'script$', 'noscript$', 'datalist$', 'embed$', 'meter$', 'progress$',
+            'template$', 'video$', 'wbr$', 'area$', 'col$', 'iframe$', 'img$', 'input$', 'link$', 'meta$',
+            'optgroup$', 'option$', 'param$', 'select$', 'style$', 'applet$', 'title$', 'body$', 'head$', 'center$',
+            'frame$', 'frameset$', 'svg$']
+
+
+
+# Search for the range with the maximum subsequence of 'True' in a bitmap.
+# Attributes: list = bitmap to be investigated
+def findMaximumSubsequenceOfTrue(list):
+    currentLength = 0;
+    currentStart = 0;
+    currentBest = range(0, 0)
+    for i in range(0, len(list)):
+        if list[i]:
+            currentLength += 1
+            if(len(currentBest) < currentLength):
+                currentBest = range(currentStart, currentStart + currentLength)
+        else:
+            currentStart = i + 1
+            currentLength = 0
+    return currentBest
+
+
+
+# Determine the number of child nodes.
+# Method used for statistical purposes.
+# Attributes: node = node = node investigated
+def numberOfChildNodes(node):
+    sum = 0
+    for child in node.children:
+        if isinstance(child, TreeElement):
+            sum = sum + 1 + numberOfChildNodes(child)
+    return sum
diff --git a/src/DOMParser/DOMNode.py b/src/DOMParser/DOMNode.py
new file mode 100644
index 0000000..6ea67d3
--- /dev/null
+++ b/src/DOMParser/DOMNode.py
@@ -0,0 +1,35 @@
+class DOMNode:
+    def __init__(self):
+        self.children = []
+
+    # Extract the whole content of all elements in the subtree induced by this DOMNode
+    def getWholeContent(self):
+        toReturn = []
+        for c in self.children:
+            if isinstance(c, TreeElement):
+                toReturn.append(c.getWholeContent())
+            elif isinstance(c, TextElement):
+                toReturn.append(c.text)
+        return '\n'.join(toReturn)
+
+
+# TreeElement used as a container to structure DOM-tree.
+# TreeElements contain style information aplicaple to all direct children TextElements.
+class TreeElement(DOMNode):
+    def __init__(self):
+        DOMNode.__init__(self)
+        self.tag = ''
+        self.attributes = dict()
+        self.style = ''
+        self.xpath = ''
+
+
+# TextElement holds text sequences as direct child of TreeElement.
+# TextElements do _not_ hold any children!
+class TextElement(DOMNode):
+    def __init__(self, text):
+        DOMNode.__init__(self)
+        self.text = text
+
+
+
diff --git a/src/DOMParser/DOMParser.py b/src/DOMParser/DOMParser.py
new file mode 100644
index 0000000..f1daa7b
--- /dev/null
+++ b/src/DOMParser/DOMParser.py
@@ -0,0 +1,165 @@
+from src.DOMParser.Font import Font
+from src.DOMParser.DOMNode import TreeElement, TextElement
+from io import StringIO
+from lxml import etree
+
+
+
+# Parse the HTML doc downloaded by Selenium.
+# Attributes: driver  = Selenium driver; extractStyle = whether style is rendered in this step
+def parseTree(driver, extractStyle):
+    parser = etree.HTMLParser()
+    tree = etree.parse(StringIO(driver.page_source), parser)
+    root = tree
+    body = tree.xpath('/html/body')[0]
+    xpath = tree.getpath(body)
+    toReturn = TreeElement()
+    toReturn.tag = body.tag
+    toReturn.xpath = xpath
+    if extractStyle:
+        toReturn.style = parseStyle(driver, xpath)
+    toReturn.children = parseSeleniumTreeRec(driver, root, body.xpath('child::node()'), extractStyle)
+    return toReturn
+
+
+
+# Recursive counterpart to parseSeleniumTree
+# Attributes: driver = Selenium driver; root = root HTML element; currentElem = node investigated in this recursion
+# step; extractStyle = whether style is rendered
+def parseSeleniumTreeRec(driver, root, currentElem, extractStyle):
+    childList = []
+    for child in currentElem:
+        if isinstance(child, str):
+            cutted = cutStrToVisibleContent(child)
+            childList.append(TextElement(cutted))
+        elif not ((str(child.tag) + '$') in getListOfUnrederedTags()) and not ('Comment' in str(child.tag)):
+            treeElement = TreeElement()
+            xpath = root.getpath(child)
+            treeElement.attributes = child.attrib
+            treeElement.tag = child.tag
+            treeElement.xpath = xpath
+            if extractStyle:
+                treeElement.style = parseStyle(driver, xpath)
+            treeElement.children = parseSeleniumTreeRec(driver, root, child.xpath('child::node()'), extractStyle)
+            childList.append(treeElement)
+        else:
+            None
+    return childList
+
+
+
+# Parse style of given element.
+# Attributes: driver = Selenium driver; xpath = XPath to currently investigated element
+def parseStyle(driver, xpath):
+    try:
+        elem = driver.find_element_by_xpath(xpath)
+        sizeStr = str(elem.value_of_css_property('font-size'))
+        size = float(sizeStr[:(len(sizeStr) - 2)])
+        style = Font(\
+                size,\
+                        int(elem.value_of_css_property('font-weight')),\
+                True if 'underline' in str(elem.value_of_css_property('text-decoration')).lower() else False,\
+                str(elem.value_of_css_property('font-family'))\
+            )
+        return style
+    except:
+        # FALLBACK for failed style extraction
+        return Font(1, 300, False, 'undefined')
+
+
+
+# Cut unimportant characters from text.
+# Attributes: text = input string
+def cutStrToVisibleContent(text):
+    toReturn = []
+    for c in text:
+        if ord(c) == 10 or ord(c) >= 32:
+            toReturn.append(c)
+    empty = ""
+    return cutStartAndEndWhitespaces(empty.join(toReturn).replace('\n', ' '))
+
+
+
+# Cut whitespaces at begin and end from text.
+# Attributes: text = input string
+def cutStartAndEndWhitespaces(text):
+    firstCharStart = 0
+    lastCharEnd = (len(text) - 1)
+    for i in range(0, len(text)):
+        if ord(text[i]) != ord(' '):
+            firstCharStart = i
+            break
+    for i in range(len(text) - 1, -1, -1):
+        if ord(text[i]) != ord(' '):
+            lastCharEnd = i
+            break
+    return text[firstCharStart:(lastCharEnd + 1)]
+
+
+
+# Return list of all tags + '$' not rendered in HTML documents.
+# These tags do not have to be included in the process of parsing.
+def getListOfUnrederedTags():
+    return ['noframes$', 'audio$', 'canvas$', 'script$', 'noscript$', 'datalist$', 'embed$', 'meter$', 'progress$',
+            'template$', 'video$', 'wbr$', 'area$', 'col$', 'iframe$', 'img$', 'input$', 'link$', 'meta$',
+            'optgroup$', 'option$', 'param$', 'select$', 'style$', 'applet$', 'title$', 'body$', 'head$', 'center$',
+            'frame$', 'frameset$', 'svg$']
+
+
+
+# Extract all style information for a subtree expanded by node.
+# Attributes: driver = Selenium driver, node = node, of which subtree is processed
+def extractStyleForSubtreeRec(driver, node):
+    childList = []
+    for child in node.children:
+        if isinstance(child, TreeElement):
+            te = TreeElement()
+            if not hasOnlyTreeChildren(child):
+                te.style = parseStyle(driver, child.xpath)
+            te.attributes = child.attributes
+            te.tag = child.tag
+            te.children = extractStyleForSubtreeRec(driver, child)
+            childList.append(te)
+        elif isinstance(child, TextElement):
+            childList.append(child)
+    return childList
+
+
+
+# Removes empty nodes.
+# Attributes: node = node, of which subtree is processed
+def cleanMainContent(node):
+    newChildren = []
+    for child in node.children:
+        if isinstance(child, TextElement) and hasNoContent(child):
+            None
+        elif isinstance(child, TreeElement):
+            child = cleanMainContent(child)
+            newChildren.append(child)
+        else:
+            newChildren.append(child)
+    node.children = newChildren
+    return node
+
+
+
+# Checks, whether a TextElement node has no content
+# Attributes: textNode = TextElement which is investigated
+def hasNoContent(textNode):
+    toReturn = True
+    for c in textNode.text:
+        if ord(c) > 32:
+            toReturn = False
+    return toReturn
+
+
+
+# Checks, whether a nodes children are all TreeElements and do not hold a single TextElement.
+# Attributes: node = node, of which subtree is investigated
+def hasOnlyTreeChildren(node):
+    toReturn = True
+    for child in node.children:
+        if isinstance(child, TextElement):
+            toReturn = False
+    return toReturn
+
diff --git a/src/DOMParser/Font.py b/src/DOMParser/Font.py
new file mode 100644
index 0000000..aad5c75
--- /dev/null
+++ b/src/DOMParser/Font.py
@@ -0,0 +1,29 @@
+#from functools import total_ordering
+
+
+#@total_ordering
+class Font:
+
+    def __init__(self, size, weight, underlined, fontFamily):
+        self.weight = weight
+        self.isUnderlined = underlined
+        self.fontSize = size
+        self.fontFamily = fontFamily
+
+    def __str__(self):
+        string = "<Font: Size " + str(self.fontSize) + "px | Props ["
+        string = string + "Weight=" + str(self.weight)
+        if self.isUnderlined:
+            string = string + "Underlined"
+        string = string + "] | Family " + str(self.fontFamily) + ">"
+        return string
+
+    def __eq__(self, other):
+        if other is None:
+            return False
+        return self.weight == other.weight and self.isUnderlined == other.isUnderlined \
+            and self.fontFamily == other.fontFamily and self.fontSize == other.fontSize
+
+    def __hash__(self):
+        return hash(self.weight) ^ hash(self.isUnderlined) ^ hash(self.fontSize) ^ hash(self.fontFamily)
+
diff --git a/src/Downloader/Downloader.py b/src/Downloader/Downloader.py
new file mode 100644
index 0000000..1ed7172
--- /dev/null
+++ b/src/Downloader/Downloader.py
@@ -0,0 +1,36 @@
+from selenium import webdriver
+from src.DOMParser.DOMParser import parseTree, parseStyle, extractStyleForSubtreeRec
+
+
+# Download and parse DOM-tree for a website.
+# Attributes: link = url to the website; getStyle = whether css rendered style should be parsed in this step;
+# driver = Selenium driver
+def getDOMTree(link, getStyle, driver):
+    close = False
+    if driver is None:
+            driver = webdriver.Chrome(executable_path='../../chromedriver')
+            close = True
+    driver.get(link)
+    body = parseTree(driver, getStyle)
+    title = driver.title
+    if close:
+            driver.close()
+    return (body, title)
+
+
+
+# Extracts CSS style information for a given subtree.
+# Attributes: url = url to the website; mainContentNode = node, which subtree is processed;
+# driver = Selenium driver
+def extractStyleForSubtree(url, mainContentDOMNode, driver=None):
+    close = False
+    if driver is None:
+        driver = webdriver.Chrome(executable_path='../../chromedriver')
+        close = True
+
+    driver.get(url)
+    mainContentDOMNode.style = parseStyle(driver, mainContentDOMNode.xpath)
+    mainContentDOMNode.children = extractStyleForSubtreeRec(driver, mainContentDOMNode)
+    if close:
+        driver.close()
+    return mainContentDOMNode
diff --git a/src/HierarchyExtractor/Block.py b/src/HierarchyExtractor/Block.py
new file mode 100644
index 0000000..19cfed7
--- /dev/null
+++ b/src/HierarchyExtractor/Block.py
@@ -0,0 +1,77 @@
+from enum import Enum
+from src.DOMParser.Font import Font
+from src.HierarchyExtractor.RomanNumber import RomanNumber
+import re
+
+class EnumerationType(Enum):
+    Numeric = 1
+    Roman = 2
+    Alphabetic = 3
+    List = 4
+
+# Retrieve numeration from given text.
+def getNumeration(txt):
+    # Looking for possible numerations in text.
+    possibleNums = re.search("\s[\(§]?(([IVXLivxl]{1,7})|([0-9]{1,2})|[a-zA-Z])([\.\-,:](([IVXLivxl]{1,7})|([0-9]{1,2})|[a-zA-Z]))*[\-:\.)]?\s", ' ' + txt + ' ')
+    if possibleNums is None:
+        return []
+
+    else:
+        # Normalizing different
+        firstNum = possibleNums[0]
+        firstNum = firstNum[:-1]
+        firstNum = firstNum.replace('-', '.')
+        firstNum = firstNum.replace(',', '.')
+        firstNum = firstNum.replace(':', '.')
+        firstNum = firstNum.replace(')', '.')
+        firstNum = firstNum.replace('(', '')
+        firstNum = firstNum.replace('§', '')
+        firstNum = firstNum.replace(' ', '.')
+        firstNum = firstNum.replace('\xa0', '.')
+
+        # Splitting string into different levels at dots.
+        nums = firstNum.split('.')
+        return translateNums(nums)
+
+
+
+
+# Translate list of strings to integer list; supports decimal numbers, roman numbers & letters.
+def translateNums(nums):
+    toReturn = []
+    for n in nums:
+        if re.search("[IVXLivxl]{1,7}", n):
+            toReturn.append((RomanNumber(n).getValue(), EnumerationType.Roman))
+        elif re.search("[0-9]{1,2}", n):
+            toReturn.append((int(n), EnumerationType.Numeric))
+        elif re.search("[a-zA-Z]", n):
+            toReturn.append((ord(n.lower()) - 96, EnumerationType.Alphabetic))
+
+
+    return toReturn
+
+
+
+# A paragraph meant for building a tree.
+class Block:
+
+    def __init__(self, text, style):
+        self.text = text
+        self.numeration = getNumeration(text[:10])
+        self.style = Font(12, False, False, 'FAMILY') if style is None else style
+
+    def getWholeContent(self):
+        if self.text is None:
+            return ' '
+        else:
+            return self.text
+
+    def __str__(self):
+        return str(self.text)
+
+    def getNumerationPattern(self):
+        return list(map(lambda x: x[1], self.numeration))
+
+    def getNumerationNumeral(self):
+        return list(map(lambda x: x[0], self.numeration))
+
diff --git a/src/HierarchyExtractor/BlockList.py b/src/HierarchyExtractor/BlockList.py
new file mode 100644
index 0000000..41dfcd1
--- /dev/null
+++ b/src/HierarchyExtractor/BlockList.py
@@ -0,0 +1,118 @@
+from enum import Enum
+
+from src.DOMParser.DOMNode import TextElement, TreeElement
+from src.HierarchyExtractor.Block import Block, EnumerationType
+
+class Type(Enum):
+    split = 0
+    listStart = 1
+    listEnd = 2
+
+
+class BlockList:
+    def __init__(self, mainContent):
+        tfList = getTFList(mainContent)
+        self.list = formListFromTF(tfList)
+
+
+class TextFraction:
+    def __init__(self, text, style, tag):
+        self.text = text
+        self.style = style
+        self.tag = tag
+
+
+def formListFromTF(tfList):
+    splitList = []
+    counter = [0]
+    for i in range(0, len(tfList)):
+        if tfList[i] == Type.split:
+            splitList.append(i)
+    blockList = []
+    for i in range(0, len(splitList) - 1):
+        blockList.append(formBlockTF(tfList[ splitList[i] + 1 : splitList[i+1] ], counter))
+
+    return list(filter(lambda x: not x is None, blockList))
+
+
+def formBlockTF(tfList, counter):
+    styles = dict()
+    totalText = ''
+    if len(tfList) > 0 and tfList[0] == Type.listStart and tfList[len(tfList) - 1] == Type.listEnd:
+        toReturn = formBlockTF(tfList[1:len(tfList) - 1], counter)
+        toReturn.numeration = [(counter[0], EnumerationType.List)]
+        counter[0] += 1
+        return toReturn
+
+    for entry in tfList:
+        if isinstance(entry, TextFraction):
+            if (str(entry.tag) + '$') != 'a$':
+                if entry.style in styles.keys():
+                    styles[entry.style] += len(entry.text)
+                else:
+                    styles[entry.style] = len(entry.text)
+            elif (str(entry.tag) + '$') == 'a$':
+                if entry.style not in styles.keys():
+                    styles[entry.style] = 0
+
+            totalText += (' ' + entry.text)
+
+    if len(totalText) > 2:
+        mcs = max(styles, key=styles.get)
+        return Block(totalText[1:], mcs)
+    else:
+        return None
+
+
+
+# Generate a list of TextFractions by traversing the tree (depth-first) and safe every content
+# string to the TF list.
+# Attributes: mainContent = root node of main content
+def getTFList(mainContent):
+    toFill = []
+    toFill.append(Type.split)
+    getTFListRec(mainContent, toFill)
+    toFill.append(Type.split)
+    return toFill
+
+
+
+# Generate a list of TextFractions by traversing the tree (depth-first) and safe every content
+# string to the TF list (REC).
+# Attributes: node = node to be processed; toFill = list of TextFractions
+def getTFListRec(node, toFill):
+    if (str(node.tag) + '$') in getParagraphFormingTags():
+        toFill.append(Type.split)
+
+        for child in node.children:
+            if isinstance(child, TextElement):
+                toFill.append(TextFraction(child.text, node.style, node.tag))
+            elif isinstance(child, TreeElement):
+                if (str(child.tag) + '$') == 'li$':
+                    toFill.append(Type.split)
+                    toFill.append(Type.listStart)
+                    getTFListRec(child, toFill)
+                    toFill.append(Type.listEnd)
+                    toFill.append(Type.split)
+                else:
+                    getTFListRec(child, toFill)
+        toFill.append(Type.split)
+    elif (str(node.tag) + '$') == 'br$':
+        toFill.append(Type.split)
+    else:
+        for child in node.children:
+            if isinstance(child, TextElement):
+                toFill.append(TextFraction(child.text, node.style, node.tag))
+            elif isinstance(child, TreeElement):
+                getTFListRec(child, toFill)
+
+
+
+# Return list of all tags + '$' rendered as a paragraph in HTML documents.
+# These tags trigger a 'split' during the formation of a TF list.
+def getParagraphFormingTags():
+    return ['article$', 'section$', 'nav$', 'aside$', 'h1$', 'h2$', 'h3$', 'h4$', 'h5$', 'h6$', 'hgroup$', 'header$',
+        'footer$', 'address$', 'p$', 'pre$', 'blockquote', 'ol$', 'ul$', 'menu$', 'li$', 'dl$', 'dt$', 'dt$', 'dd$',
+        'figure$', 'figcaption$', 'main$', 'div$', 'summary$', 'td$', 'th$', 'caption$', 'legend$', 'form$',
+        'fieldset$', 'details$']
+
diff --git a/src/HierarchyExtractor/BlockNode.py b/src/HierarchyExtractor/BlockNode.py
new file mode 100644
index 0000000..dfbd9eb
--- /dev/null
+++ b/src/HierarchyExtractor/BlockNode.py
@@ -0,0 +1,32 @@
+from src.HierarchyExtractor.Block import Block
+
+
+class BlockNode:
+    def __init__(self):
+        self.children = []
+        self.headline = None
+
+    def getWholeContent(self):
+        text = ''
+        if self.headline is not None:
+            text += self.headline.text
+        for child in self.children:
+            text += child.getWholeContent()
+
+        return text
+
+
+def printTree(node, depth):
+    indent = ' '
+    inFurther = '>'
+    for i in range(0, depth):
+        indent += inFurther
+    indent += ' '
+
+
+    if isinstance(node, BlockNode):
+        print(indent + str(node.headline))
+        for elem in node.children:
+            printTree(elem, depth + 1)
+    elif isinstance(node, Block):
+        print(indent + '\t' + node.text)
diff --git a/src/HierarchyExtractor/EnumerationHierarchyExtractor.py b/src/HierarchyExtractor/EnumerationHierarchyExtractor.py
new file mode 100644
index 0000000..6149328
--- /dev/null
+++ b/src/HierarchyExtractor/EnumerationHierarchyExtractor.py
@@ -0,0 +1,204 @@
+from src.HierarchyExtractor.Block import Block, EnumerationType
+from src.HierarchyExtractor.BlockNode import BlockNode
+
+
+
+# Façade for different hierarchy extraction approaches based on enumeration patterns.
+# Attributes: rootNode = rootNode of the visually separated hierarchy tree
+def extractHierarchyNumerically(rootNode):
+    separateBlocksNums(rootNode)
+    validateBlocksNums(rootNode)
+    adjustListNums(rootNode)
+
+
+
+# Apply enumeration patterns for lists.
+# Attributes: rootNode = rootNode of the visually and hierarchically separated hierarchy tree
+def adjustListNums(rootNode):
+    if isinstance(rootNode, BlockNode):
+        sepList = []
+        sepList.append(0)
+        for i in range(0, len(rootNode.children)):
+
+            if isinstance(rootNode.children[i], BlockNode):
+                adjustListNums(rootNode.children[i])
+            else:
+                if EnumerationType.List in rootNode.children[i].getNumerationPattern():
+                    sepList.append(i)
+                elif i - 1 == sepList[len(sepList) - 1] and sepList[len(sepList) - 1] != 0:
+                    sepList.append(i)
+
+
+        if len(sepList) > 1:
+            sepList.append(sepList[len(sepList)-1]+1)
+            sepList.append(len(rootNode.children))
+            newChildren = []
+            for i in range(0, len(sepList) - 1):
+                toAppend = BlockNode()
+                toAppend.headline = Block('', None)
+                toAppend.children = rootNode.children[sepList[i]:sepList[i+1]]
+                newChildren.append(toAppend)
+
+            rootNode.children = newChildren
+
+
+
+# Check the textual content of a block for occuring enumeration patterns.
+# Attributes: node = node, which content should be checked
+def separateBlocksNums(node):
+    relevantBlocks = list(filter(lambda x: isinstance(x, Block), node.children))
+    otherBlockNodes = list(filter(lambda x: isinstance(x, BlockNode), node.children))
+    headlineStyles = getAllStyles(relevantBlocks)
+    for style in headlineStyles:
+        isValidNumStyleRes = isValidNumStyle(style, relevantBlocks)
+        if isValidNumStyleRes[0]:
+            headlineList = []
+            for i in range(0, len(relevantBlocks)):
+                if isinstance(relevantBlocks[i], Block) and relevantBlocks[i].getNumerationPattern() == style[0]:
+                    headlineList.append(i)
+            if isValidNumerationPattern(relevantBlocks, headlineList):
+                headlineList.append(len(relevantBlocks))
+
+                newChildren = relevantBlocks[0:headlineList[0]]
+                for i in range(0, len(headlineList) - 1):
+                    toAppend = BlockNode()
+                    if isValidNumStyleRes[1]:
+                        # no headline
+                        toAppend.headline = Block('', None)
+                        toAppend.children = relevantBlocks[(headlineList[i]):headlineList[i+1]]
+                    else:
+                        # with headline
+                        toAppend.headline = relevantBlocks[headlineList[i]]
+                        toAppend.children = separateBlocksNumsRec(relevantBlocks[(headlineList[i] + 1):headlineList[i+1]])
+                    newChildren.append(toAppend)
+                newChildren += otherBlockNodes
+                node.children = newChildren
+                break
+
+    for child in node.children:
+        if isinstance(child, BlockNode):
+            separateBlocksNums(child)
+
+
+
+# Check the textual content of a block for occuring enumeration patterns (REC).
+# Attributes: blockList = block list of textual content to be checked
+def separateBlocksNumsRec(blockList):
+    headlineStyles = getAllStyles(blockList)
+    for style in headlineStyles:
+        isValidNumStyleRes = isValidNumStyle(style, blockList)
+        if isValidNumStyleRes[0]:
+            headlineList = []
+            for i in range(0, len(blockList)):
+                if isinstance(blockList[i], Block) and blockList[i].getNumerationPattern() == style[0]:
+                    headlineList.append(i)
+            if isValidNumerationPattern(blockList, headlineList):
+                headlineList.append(len(blockList))
+
+                newChildren = blockList[0:headlineList[0]]
+                for i in range(0, len(headlineList) - 1):
+                    toAppend = BlockNode()
+                    if isValidNumStyleRes[1]:
+                        # no headline
+                        toAppend.headline = Block('', None)
+                        toAppend.children = blockList[(headlineList[i]):headlineList[i+1]]
+                    else:
+                        # with headline
+                        toAppend.headline = blockList[headlineList[i]]
+                        toAppend.children = separateBlocksNumsRec(blockList[(headlineList[i] + 1):headlineList[i+1]])
+                    newChildren.append(toAppend)
+                return newChildren
+    return blockList
+
+
+
+# Check if a numeration pattern is occurring at least twice in a list of blocks and if blocks contain a headline.
+# Attributes: style = numeration patter; nodeChildren = list of blocks
+def isValidNumStyle(style, nodeChildren):
+    if EnumerationType.List in style[0]:
+        return (False, False)
+    if len(list(filter(lambda block: block.getNumerationPattern() == style[0],\
+                            list(filter(lambda child: isinstance(child, Block), nodeChildren))))) <= 1:
+        return (False, False)
+
+    if len(nodeChildren[style[1]].text.split()) >= 10:
+        return (True, True)
+    elif style[1] == len(list(filter(lambda child: isinstance(child, Block), nodeChildren))) - 1:
+        return (nodeChildren[style[1]].getNumerationPattern() != nodeChildren[style[1] - 1].getNumerationPattern(), False)
+    else:
+        return (nodeChildren[style[1]].getNumerationPattern() != nodeChildren[style[1] + 1].getNumerationPattern(), False)
+
+
+
+# Gather all enumeration patterns whithin a list of blocks.
+# Attributes: nodeChildren = list of blocks
+def getAllStyles(nodeChildren):
+    toReturn = []
+    for i in range(0, len(nodeChildren)):
+        if isinstance(nodeChildren[i], Block) and len(nodeChildren[i].numeration) >= 1:
+            toReturn.append((nodeChildren[i].getNumerationPattern(), i))
+    return toReturn
+
+
+
+# Check if a numeration pattern is valid in a list of blocks.
+# Attributes: nodeList = list of blocks; headlineList = indexes of all occurring headlines
+def isValidNumerationPattern(nodeList, headlineList):
+    if len(headlineList) >= 10:
+        return True
+    for i in range(1, len(headlineList)):
+        if isinstance(nodeList[headlineList[i]], Block):
+            if not isValidStep(nodeList[headlineList[i-1]].numeration, nodeList[headlineList[i]].numeration):
+                return False
+        else:
+            if not isValidStep(nodeList[headlineList[i-1]].headline.numeration, nodeList[headlineList[i]].headline.numeration):
+                return False
+    return True
+
+
+
+# Check if a step within a numeration pattern is valid.
+# Attributes: num1 = first numeration pattern; num2 = second numeration pattern
+def isValidStep(num1, num2):
+    sumNumbers1 = sum(list(map(lambda x: x[0], num1)))
+    sumNumbers2 = sum(list(map(lambda x: x[0], num2)))
+    return sumNumbers2 - sumNumbers1 <= 2 and sumNumbers2 - sumNumbers1 >= 1
+
+
+
+# Check all headlines for existing enumeration patterns (including those detected visually-based).
+# Attributes: node = node, which headline should be checked
+def validateBlocksNums(node):
+    headlineStyles = list(map(lambda y: list(map(lambda x: x[1], y.headline.numeration)), list(filter(lambda x: isinstance(x, BlockNode) and (not x.headline is None), node.children))))
+    for headlineStyle in headlineStyles:
+        headlineList = []
+        for i in range(0, len(node.children)):
+            if isinstance(node.children[i], BlockNode) and \
+                    list(map(lambda x: x[1], node.children[i].headline.numeration)) == headlineStyle:
+                headlineList.append(i)
+
+        if isValidNumerationPattern(node.children, headlineList) and \
+                len(headlineList) >= 2 and len(list(filter(lambda x: isinstance(x, BlockNode), node.children))) != len(headlineList):
+            newChildren = node.children[0:headlineList[0]]
+            for i in range(0, len(headlineList)):
+                toAppend = BlockNode()
+                toAppend.headline = node.children[headlineList[i]].headline
+                toAppend.children = node.children[headlineList[i]].children
+                if i == len(headlineList) - 1:
+                    toAppend.children += node.children[(headlineList[i] + 1):len(node.children)]
+                else:
+                    toAppend.children += node.children[(headlineList[i] + 1):headlineList[i+1]]
+
+                newChildren.append(toAppend)
+
+            node.children = newChildren
+            break
+
+
+    for child in node.children:
+            if isinstance(child, BlockNode):
+                validateBlocksNums(child)
+
+
+
+
diff --git a/src/HierarchyExtractor/HierarchyExtractor.py b/src/HierarchyExtractor/HierarchyExtractor.py
new file mode 100644
index 0000000..6bcf2c5
--- /dev/null
+++ b/src/HierarchyExtractor/HierarchyExtractor.py
@@ -0,0 +1,25 @@
+import src.ContentExtractor.ContentExtractorTypes
+from src.ContentExtractor import ContentExtractor
+from src.ContentExtractor.TreeUtilities import getFrequencyOfStyles
+from src.HierarchyExtractor.EnumerationHierarchyExtractor import extractHierarchyNumerically
+from src.HierarchyExtractor.VisualStyleHierarchyExtractor import extractHierarchyVisually
+
+
+# Façade for different hierarchy extraction approaches.
+# Attributes: mainContentDOMNode = node holding the main content; contentExtractor = content extractor type
+def extractHierarchy(mainContentDOMNode, contentExtractor):
+    defaultStyle = None
+
+    # MCS is needed to determine non-headline style
+    if contentExtractor is src.ContentExtractor.ContentExtractorTypes.ContentExtractor.NaiveStyleAndShortTextExclusion \
+            or src.ContentExtractor.ContentExtractorTypes.ContentExtractor.RenderedStyleAndShortTextExclusion:
+        dic = getFrequencyOfStyles(mainContentDOMNode, src.ContentExtractor.ContentExtractorTypes.ContentExtractor.RenderedStyleAndShortTextExclusion)
+        defaultStyle = max(dic, key=dic.get)
+    elif contentExtractor is src.ContentExtractor.ContentExtractorTypes.ContentExtractor.RenderedStyle or src.ContentExtractor.ContentExtractorTypes.ContentExtractor.NaiveStyle:
+        dic = getFrequencyOfStyles(mainContentDOMNode, src.ContentExtractor.ContentExtractorTypes.ContentExtractor.RenderedStyle)
+        defaultStyle = max(dic, key=dic.get)
+
+
+    hierarchyTree = extractHierarchyVisually(mainContentDOMNode, defaultStyle)
+    extractHierarchyNumerically(hierarchyTree)
+    return hierarchyTree
diff --git a/src/HierarchyExtractor/RomanNumber.py b/src/HierarchyExtractor/RomanNumber.py
new file mode 100644
index 0000000..bd67295
--- /dev/null
+++ b/src/HierarchyExtractor/RomanNumber.py
@@ -0,0 +1,39 @@
+class RomanNumber:
+
+
+
+    def __init__(self, num):
+        self.num = num.upper()
+        self.valid = True
+        self.resolve = {
+            'I' : 1,
+            'V' : 5,
+            'X' : 10,
+            'L' : 50,
+        }
+        for c in self.num:
+            if c not in self.resolve.keys():
+                self.valid = False
+
+
+    # Get integer decimal value of a roman number.
+    def getValue(self):
+        if not self.valid:
+            return -1
+        else:
+            reversed = self.num[::-1]
+            indexList = ['I', 'V', 'X', 'L']
+            lastIndex = 0
+            sum = 0
+            for c in reversed:
+                if indexList.index(c) < lastIndex:
+                    sum -= self.resolve[c]
+                else:
+                    lastIndex = indexList.index(c)
+                    sum += self.resolve[c]
+            return sum
+
+
+    def isValid(self):
+        return self.valid
+
diff --git a/src/HierarchyExtractor/VisualStyleHierarchyExtractor.py b/src/HierarchyExtractor/VisualStyleHierarchyExtractor.py
new file mode 100644
index 0000000..790c501
--- /dev/null
+++ b/src/HierarchyExtractor/VisualStyleHierarchyExtractor.py
@@ -0,0 +1,93 @@
+from src.DOMParser.Font import Font
+from src.HierarchyExtractor.BlockList import BlockList
+from src.HierarchyExtractor.BlockNode import BlockNode
+
+
+# Visual-based hierarchy extraction.
+# Attributes: mainContent = main content dom node; defaultStyle = MCS not regarded as a possible headline
+def extractHierarchyVisually(mainContent, defaultStyle):
+    blockList = BlockList(mainContent)
+    #printBlockList(blockList)
+    hierarchyTree = separateBlocks(blockList, defaultStyle)
+    return hierarchyTree
+
+
+
+# Print a list of blocks and its associated information (DEBUGGING).
+# Attributes: blockList = list of blocks
+def printBlockList(blockList):
+    for entry in blockList.list:
+        print(str(entry.style) + '\t' + str(entry.numeration) + '\t' + entry.text)
+
+
+
+# Separate blocks using a visual-based hierarchy extraction.
+# Attributes: blockList = list of blocks; defaultStyle = MCS
+def separateBlocks(blockList, defaultStyle):
+    toReturn = BlockNode()
+    toReturn.children = separateBlocksRec(blockList.list, defaultStyle)
+    return toReturn
+
+
+
+# Separate blocks using a visual-based hierarchy extraction (REC).
+# Attributes: blockList = list of blocks; defaultStyle = MCS
+def separateBlocksRec(nodeList, defaultStyle):
+
+    # find next headline style
+    headlineStyle = findNextHeadlineStyle(nodeList, defaultStyle)
+    if headlineStyle is None:
+        return nodeList
+
+    else:
+        headLineList = []
+        # create list of all headline style occurrences
+        for i in range(0, len(nodeList)):
+            if nodeList[i].style == headlineStyle:
+                headLineList.append(i)
+
+        # save all blocks before the headline as content to the current node
+        childListToReturn = []
+        if headLineList != []:
+            childListToReturn = nodeList[0:headLineList[0]]
+
+        # split the block list according to headline style occurrences and recursively process the
+        # content in between headlines
+        for i in range(0, len(headLineList)):
+            toAppend = BlockNode()
+            toAppend.headline = nodeList[headLineList[i]]
+            if i == len(headLineList) - 1:
+                toAppend.children = separateBlocksRec(nodeList[(headLineList[i] + 1):len(nodeList)], defaultStyle)
+            else:
+                toAppend.children = separateBlocksRec(nodeList[(headLineList[i] + 1):headLineList[i+1]], defaultStyle)
+            childListToReturn.append(toAppend)
+
+        return childListToReturn
+
+
+
+# Determines whether a style is more prominent than the MCS.
+# Attributes: style = currently investigated style; defaultStyle = MCS
+def isMoreProminent(style, defaultStyle):
+    if style.isUnderlined or style.weight > defaultStyle.weight:
+        return True
+    else:
+        return style.fontSize > defaultStyle.fontSize
+
+
+
+# Searches for the next headline style.
+# Attributes: list = block list; defaultStyle = MCS
+def findNextHeadlineStyle(list, defaultStyle):
+    for elem in list:
+        if (elem.style != defaultStyle and elem.style != Font(1, 300, False, 'undefined')) \
+                and (len(elem.text.split())) <= 10 and isMoreProminent(elem.style, defaultStyle):
+            return elem.style
+    return None
+
+
+
+
+
+
+
diff --git a/src/SentenceSegmentation/LanguageClassification.py b/src/SentenceSegmentation/LanguageClassification.py
new file mode 100644
index 0000000..c4d3ace
--- /dev/null
+++ b/src/SentenceSegmentation/LanguageClassification.py
@@ -0,0 +1,23 @@
+from enum import Enum
+import langid
+
+class Lang(Enum):
+    de = 0
+    en = 1
+
+
+# Classify language (en/de) of a text.
+# Attributes: txt = input text as string
+def classifyLanguage(txt):
+    langid.set_languages(['de','en'])
+    res = langid.classify(txt)
+    return Lang(0) if res[0] == 'de' else Lang(1)
+
+
+
+# Classify language (en/de) for a hierarchy tree by transforming it into a string and classify
+# its language.
+# Attributes: hierarchyTree = tree, for which the language is determined.
+def getLang(hierarchyTree):
+    mainContentText = hierarchyTree.getWholeContent()
+    return classifyLanguage(mainContentText)
diff --git a/src/SentenceSegmentation/Segmenter.py b/src/SentenceSegmentation/Segmenter.py
new file mode 100644
index 0000000..a486ad9
--- /dev/null
+++ b/src/SentenceSegmentation/Segmenter.py
@@ -0,0 +1,47 @@
+from somajo import SoMaJo
+from src.SentenceSegmentation.LanguageClassification import Lang
+
+
+
+# Segment and tokenize a German string.
+# Attributes: input = string to be tokenized
+def getTokensFromStringDe(input):
+    tokenizer = SoMaJo("de_CMC", split_sentences=True)
+    toReturn = []
+    sentences = tokenizer.tokenize_text([input])
+    for sentence in sentences:
+        senteceList = []
+        for word in sentence:
+            senteceList.append(word.text)
+        toReturn.append(senteceList)
+    return toReturn
+
+
+
+# Segment and tokenize an English string.
+# Attributes: input = string to be tokenized
+def getTokensFromStringEn(input):
+    tokenizer = SoMaJo("en_PTB", split_sentences=True)
+    toReturn = []
+    sentences = tokenizer.tokenize_text([input])
+    for sentence in sentences:
+        senteceList = []
+        for word in sentence:
+            senteceList.append(word.text)
+        toReturn.append(senteceList)
+    return toReturn
+
+
+
+# Segment and tokenize a tree.
+# Attributes: tree = node, which subtree will be processed; lang = language of content (en/de)
+def segmentSentences(tree, lang):
+    for subsection in tree:
+        if lang is Lang.de:
+            subsection.text = getTokensFromStringDe(subsection.text)
+        elif lang is Lang.en:
+            subsection.text = getTokensFromStringEn(subsection.text)
+        segmentSentences(subsection.subsections, lang)
+
+
+
diff --git a/src/StructuredLegalExtraction.py b/src/StructuredLegalExtraction.py
new file mode 100644
index 0000000..1fc5b55
--- /dev/null
+++ b/src/StructuredLegalExtraction.py
@@ -0,0 +1,69 @@
+from enum import Enum
+
+from selenium import webdriver
+
+import src.ContentExtractor.ContentExtractorTypes
+from src.ContentExtractor import ContentExtractor
+from src.ContentExtractor.ContentExtractor import getMainContent
+from src.ContentExtractor.TreeUtilities import getFrequencyOfStyles
+from src.Downloader.Downloader import getDOMTree, extractStyleForSubtree
+from src.HierarchyExtractor.HierarchyExtractor import extractHierarchy
+from src.TargetStructure.TargetStructure import generateTargetStructure
+
+
+def extractTandC(url, contentExtractor=src.ContentExtractor.ContentExtractorTypes.ContentExtractor.NaiveStyleAndShortTextExclusion,
+                 threshold=0.85, driver=None):
+
+    # Check for legality of threshold.
+    if not threshold > 0.5:
+        print('Threshold must be above 0.5!')
+        return None
+
+    # Use Downloader component:
+    extractStyle = False
+    if contentExtractor is src.ContentExtractor.ContentExtractorTypes.ContentExtractor.RenderedStyle \
+            or contentExtractor is src.ContentExtractor.ContentExtractorTypes.ContentExtractor.RenderedStyleAndShortTextExclusion:
+        extractStyle = True
+    website = getDOMTree(url, extractStyle, driver)
+    title = website[1]
+    bodyNode = website[0]
+
+
+    # Use Content Extractor component:
+    mainContent = getMainContent(bodyNode, contentExtractor, threshold)
+
+
+    # Add styling if this did not happen before.
+    if not extractStyle:
+        extractStyleForSubtree(url, mainContent, driver)
+
+    hierarchyTree = extractHierarchy(mainContent, contentExtractor)
+
+    toReturn = generateTargetStructure(hierarchyTree, url, title)
+    return toReturn
+
+
+#def extractTandC_content(link, driver=None):
+#    website = getDOMTree(link, False, driver)
+#    bodyNode = website[0]
+#    dic = getFrequencyOfStyles(bodyNode, src.ContentExtractor.ContentExtractorTypes.ContentExtractor.NaiveStyleAndShortTextExclusion)
+#    mainContent = getMainContent(bodyNode, src.ContentExtractor.ContentExtractorTypes.ContentExtractor.NaiveStyleAndShortTextExclusion, 0.85)
+#    return (mainContent.getWholeContent(), bodyNode.getWholeContent(), dic, mainContent)
+
+
+
+# Extract multiple T&Cs with the same driver and return result list.
+# Attributes: links = list of links as string; contentExtractor = content extraction method; threshold = minimum
+# coverage for main content node; driver = Selenium driver
+def extractTandD_multiple(links, contentExtractor=src.ContentExtractor.ContentExtractorTypes.ContentExtractor.NaiveStyleAndShortTextExclusion,
+                          threshold=0.85, driver=None):
+    close = False
+    if driver is None:
+        close = True
+        driver = webdriver.Chrome(executable_path='../chromedriver')
+    results = []
+    for link in links:
+        results.append(extractTandC(link, contentExtractor, threshold, driver))
+    if close:
+        driver.close()
+    return results
diff --git a/src/TargetStructure/Document.py b/src/TargetStructure/Document.py
new file mode 100644
index 0000000..3e07ade
--- /dev/null
+++ b/src/TargetStructure/Document.py
@@ -0,0 +1,21 @@
+from src.TargetStructure.Section import Section
+import json
+import datetime
+
+class Document:
+
+    def __init__(self, title, source):
+        now = datetime.datetime.now()
+        self.extractionDate = (now.microsecond, now.second, now.minute, now.hour, now.day, now.month, now.year)
+        self.source = source
+        self.title = title
+        self.content = None
+        self.id = abs(int(hash(self.extractionDate) ^ hash(self.source)))
+
+
+    def getExtraction(self):
+        return json.dumps(self, default=lambda o: o.__dict__,
+            sort_keys=True, indent=4)
+
+
+
diff --git a/src/TargetStructure/Section.py b/src/TargetStructure/Section.py
new file mode 100644
index 0000000..b8e7729
--- /dev/null
+++ b/src/TargetStructure/Section.py
@@ -0,0 +1,37 @@
+import json
+
+from src.HierarchyExtractor.Block import Block
+from src.HierarchyExtractor.BlockNode import BlockNode
+
+
+
+class Section:
+
+    def __init__(self, title, text, subs):
+        self.subsections = subs
+        self.text = text
+        self.title = title
+
+    def getExtraction(self):
+        return json.dumps(self, default=lambda o: o.__dict__,
+            sort_keys=True, indent=4)
+
+
+# Parse hierarchy tree into sections.
+# Attributes: tree = node of hierarchy tree
+def parseToSections(tree):
+    text = ''
+    for child in tree.children:
+        if isinstance(child, Block):
+            text += ('\n' + child.text)
+
+    subs = []
+    for child in tree.children:
+        if isinstance(child, BlockNode):
+            subs.append(parseToSections(child))
+
+    if tree.headline is not None:
+        toReturn = Section(tree.headline.text, text[1:], subs)
+    else:
+        toReturn = Section('', text[1:], subs)
+    return toReturn
diff --git a/src/TargetStructure/TargetStructure.py b/src/TargetStructure/TargetStructure.py
new file mode 100644
index 0000000..758d6f1
--- /dev/null
+++ b/src/TargetStructure/TargetStructure.py
@@ -0,0 +1,23 @@
+from src.SentenceSegmentation.LanguageClassification import getLang
+from src.SentenceSegmentation.Segmenter import segmentSentences
+from src.TargetStructure.Document import Document
+from src.TargetStructure.Section import parseToSections, Section
+
+
+# Generates the JSON target structure.
+# Attributes: hierarchyTree = root node of hierarchy tree; link = url to website;
+# title = the websites title
+def generateTargetStructure(hierarchyTree, link, title):
+    language = getLang(hierarchyTree)
+
+    document = Document(title, link)
+    parseResult = parseToSections(hierarchyTree)
+    if parseResult.text == '':
+        sections = parseResult.subsections
+    else:
+        sections = [Section('', parseResult.text, [])] + parseResult.subsections
+    segmentSentences(sections, language)
+    document.content = sections
+
+    toReturn = document.getExtraction()
+    return toReturn