diff --git a/.gitignore b/.gitignore
index 894a44c..fb9b363 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,3 +102,8 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+.idea
+text_payslips/*.txt
+/payslips-month-columns.csv
+/payslips-month-rows.csv
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..6b285e1
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,13 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+"pdfminer.six" = "*"
+chardet = "==3.0.4"
+
+[dev-packages]
+
+[requires]
+python_version = "3.7"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..6cce92c
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,83 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "8e6b425c47b2dc898efa6fbf512b89add52b62ac6a844422d6e3cadd31a79fff"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.7"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "chardet": {
+            "hashes": [
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+            ],
+            "index": "pypi",
+            "version": "==3.0.4"
+        },
+        "pdfminer.six": {
+            "hashes": [
+                "sha256:f04d029d1d3e58c87da51bdefef2e9a1dbf2d7b63f727dd2a3e36054f5ae96ea"
+            ],
+            "index": "pypi",
+            "version": "==20181108"
+        },
+        "pycryptodome": {
+            "hashes": [
+                "sha256:0281dc6a65a4d0d9e439f54e0ad5faf27bfdc2ebe9ead36912bac74a0920fa2e",
+                "sha256:02af9b284f5c9a55f06f5e4532c16c9b7bd958e293e93969934d864ef7bd87ee",
+                "sha256:09da99372fb69762e4b9690291176a166cc351793e2e1c9405d29ca291503aa8",
+                "sha256:0c2400ccfc049c3f24e65d4f02bb4208d86e408011019e455fab7f50d2b226c9",
+                "sha256:2081dd6dce6b21bf3596427edaedd4f2561dce616893b162ed2c674f3a3ca70a",
+                "sha256:28b86ec9fdb005a2a18e4862a3a7277046738825ee8dc89cda5657e75a396089",
+                "sha256:2d790c0d4c0d5edcf5fbab4e2af7b03757e40c5ae8d217f0dfe9ddea37fe130f",
+                "sha256:2f24906153dca16528cf5515b1afa9ef635423d5a654904e861765f88ca667b6",
+                "sha256:30d283939896fa4bacbdb9fa86e6fd51e9a5b953a511e210b38481f697f289f5",
+                "sha256:31f78b67f97830d137f74813c0502a181a03b43a32ed124049bb20428176c307",
+                "sha256:33c1f3a380fd38ab4dd4372bef17e98002b360b52814bb1b077693b1bd06ec87",
+                "sha256:34091e9a6650c44e25339f22fc821396f19f152f65be2546edd823a093fb5a04",
+                "sha256:567fb73951ab6865a2eb1a0060b54be1e27302574f6c65879525bdf53fab49e1",
+                "sha256:5bc40f8aa7ba8ca7f833ad2477b9d84e1bfd2630b22a46d9bbd221982f8c3ac0",
+                "sha256:6b0a0ccf33c7a6100c569667c888335a4aaf0d22218cb97b4963a65d70f6c343",
+                "sha256:71b93157f1ce93fc7cfff9359b76def2b4826a7ef7a7f95e070161368e7f584a",
+                "sha256:7d939d511b7dac29b2d936706786771ecb8256e43fade5cdb0e8bc58f02b86cf",
+                "sha256:7fbc5a93d52e4c51487f4648b00dc41700adb144d10fc567b05f852e76c243ad",
+                "sha256:9cb94b8f9c915a5d2b273d612a25a8e5d67b49543f8eb6bcec0275ac46cda421",
+                "sha256:a585ea1722f9731e75881d5ffcc51d11c794d244ac57e7c2a9cbb8d5ac729302",
+                "sha256:a6458dd7a10ae51f6fce56bdfc79bf6d3b54556237045d09e77fbda9d6d37864",
+                "sha256:a9fb92e948128bce0239b87c6efcf2cb1c5a703d0b41dd6835211e6fafd1c5df",
+                "sha256:b0b6b4ca1c53e7d6ca9f2720919f63837f05e7a5f92912a2bc29bfd03ed3b54f",
+                "sha256:b7d22c8d648aaa3a7ec785eda544402141eb78ac5ffbba4cbe2c3a1f52276870",
+                "sha256:bc9560574a868cfa2ba781b7bb0b4685b08ea251697abfc49070ffc05e1cbee6",
+                "sha256:c0c5a576f3f7b7de3f86889cb47eb51b59dc11db9cf1e2a0f51eb4d988010ea4",
+                "sha256:e1c91c2fa942a71c98a7a1f462de6dbbe82f34b9267eb8131314d97bd13bf0d4",
+                "sha256:ec936361ad78aa95382c313df95777795b8185aac5dd3ec5463363ea94b556fc"
+            ],
+            "version": "==3.8.2"
+        },
+        "six": {
+            "hashes": [
+                "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
+                "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
+            ],
+            "version": "==1.12.0"
+        },
+        "sortedcontainers": {
+            "hashes": [
+                "sha256:974e9a32f56b17c1bac2aebd9dcf197f3eb9cd30553c5852a3187ad162e1a03a",
+                "sha256:d9e96492dd51fae31e60837736b38fe42a187b5404c16606ff7ee7cd582d4c60"
+            ],
+            "version": "==2.1.0"
+        }
+    },
+    "develop": {}
+}
diff --git a/README.md b/README.md
index 6db7134..8a70c55 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,74 @@
 # ms-uk-payslip-parser
-Parser for payslips
+
+Simple parser for payslips issued by MS UK.
+
+Converts a series of your PDF payslips into a neat CSV table. 
+
+## Installation
+
+- Install Python3 3.7+ and Virtualenv
+- Install dependencies
+```
+# create a virtualenv
+mkvirtualenv payslip-parser
+# switch to virtualenv
+workon payslip-parser
+# install dependencies
+pip3 install -r requirements.txt
+```
+- Or if you have `pipenv` installed:
+```bash
+pipenv install
+```
+
+## Usage
+
+1. Download your payslips PDF files from the portal and put them in a directory
+   e.g. `~/payslips`
+
+2. Get into your virtualenv:
+    
+    ```bash
+    workon payslip-parser
+    ```
+    
+    or if you have `pipenv`
+    
+    ```bash
+    pipenv shell
+    ```
+
+3. First, convert PDF files to text:
+    
+    ```bash
+    python3 to_text.py ~/payslips ./text_payslips
+    ``` 
+    
+    Now you should see text files with your payslips content in `text_payslips` directory.
+
+4. Now you can parse the text files and produce CSV tables:
+
+    ```bash
+    python3 parser.py ./text_payslips
+    ``` 
+
+   After this you will see two CSV files in this directory:
+   - `payslips-month-columns.csv` - each month's data is in a separate column
+   - `payslips-month-rows.csv` - each month's data is in a separate row
+   
+   Every payslip item label has a short prefix identifying its payslip section:
+   - `.m` - metadata item
+   - `.d.p` - payments data item
+   - `.d.d` - deductions data item
+   - `.d.t` - totals data item
+   - `.d.et` - employer totals data item
+   - `.d.ytd` - year-to-date data item
+   
+5. Open the CSV file in your spreadsheet editor of choice or Pandas.
+
+
+## Feedback
+
+Create an issue if you encounter a problem or have a suggestions.
+Or ping me on Teams.
+
diff --git a/parser.py b/parser.py
new file mode 100644
index 0000000..1c422bf
--- /dev/null
+++ b/parser.py
@@ -0,0 +1,225 @@
+import collections
+import csv
+import re
+import sys
+from collections import OrderedDict
+from pathlib import Path
+
+HEADER_FIELD = '.m.Pay Date'
+
+FIELDS_ORDER = [
+    HEADER_FIELD, '.m.Pay', '.m.',
+    '.d.p',
+    '.d.d',
+    '.d.t',
+    '.d.et',
+    '.d.ytd',
+]
+
+UNWANTED_FIELDS = [
+    '.m.Company Name', '.m.Account', '.m.Sort Code', '.m.NI Number', '.m.NI Category', '.m.Pay Method',
+]
+
+
+def parse_amount(amount: str):
+    amount = amount.replace(',', '')
+    if amount.endswith('-'):
+        return -float(amount[:-1])
+    else:
+        return float(amount)
+
+
+def parse_metadata(metadata_text: str):
+    metadata = {}
+    for row in metadata_text.splitlines():
+        if not row:
+            continue
+        _, cell1, cell2, cell3, _ = row.split('|')
+        for cell in [cell1, cell2, cell3]:
+            cell = cell.strip()
+            if cell:
+                separator_regex = r':\s+' if ':' in cell else r'\s\s+'
+                item, value = re.compile(separator_regex).split(cell, maxsplit=1)
+                metadata[item.strip()] = value.strip()
+
+    return metadata
+
+
+def parse_payments_table(payments_table: str):
+    payments = {}
+    deductions = {}
+    ytd_balances = {}
+    for row in payments_table.splitlines():
+        row = row.strip()
+        if not row:
+            continue
+        _, payment, deduction, ytd_balance, _ = row.split('|')
+
+        payment = payment.strip()
+        if payment:
+            payment_item, amount = re.compile(r'\s\s+').split(payment)
+            payments[payment_item] = parse_amount(amount)
+
+        deduction = deduction.strip()
+        if deduction:
+            deduction_item, amount = re.compile(r'\s\s+').split(deduction)
+            deductions[deduction_item] = parse_amount(amount)
+
+        ytd_balance = ytd_balance.strip()
+        if ytd_balance:
+            ytd_balance_item, amount = re.compile(r'\s\s+').split(ytd_balance)
+            ytd_balances[ytd_balance_item] = parse_amount(amount)
+
+    return payments, deductions, ytd_balances
+
+
+def parse_totals(totals_row: str):
+    totals = {}
+    _, payment_total, deduction_total, net_pay, _ = totals_row.split('|')
+    for total_value in [payment_total, deduction_total, net_pay]:
+        item, amount = re.compile(r':\s+').split(total_value.strip())
+        totals[item] = parse_amount(amount)
+    return totals
+
+
+def parse_employer_totals(employer_total_footer):
+    totals = {}
+    for row in employer_total_footer.strip().splitlines()[1:]:
+        row = row.strip()
+        if not row or row.count('|') != 4:
+            continue
+
+        _, this_employer_cell, _ = row.split('|', maxsplit=2)
+        item, amount = re.compile(r'\s\s+').split(this_employer_cell.strip())
+        totals[item] = parse_amount(amount)
+    return totals
+
+
+def parse_payslip(payslip_text: str):
+    address, metadata, payment_data = re.compile(r"^\s+?-+$", re.MULTILINE).split(payslip_text)
+
+    _, payment_headers, payments_table, totals_row, _, employer_total_footer = \
+        re.compile(r"^\s+?-+\|$", re.MULTILINE).split(payment_data)
+
+    metadata = parse_metadata(metadata)
+    payments, deductions, ytd_balances = parse_payments_table(payments_table)
+    totals = parse_totals(totals_row)
+    employer_totals = parse_employer_totals(employer_total_footer)
+
+    data = {
+        'p': payments,
+        'd': deductions,
+        'ytd': ytd_balances,
+        't': totals,
+        'et': employer_totals
+    }
+    return {
+        # 'address': address,
+        'm': metadata,
+        'd': data
+    }
+
+
+def print_payslip(dd, indent=""):
+    for k, v in dd.items():
+        if not hasattr(v, 'items'):
+            print(f"{k}:\n{v}")
+            # print(['*'] * 30)
+        else:
+            print(f"{k}:\n")
+            print_payslip(v, indent=indent + "    ")
+
+
+def count_fields(counts, nested_dict, prefix=''):
+    if hasattr(nested_dict, 'items'):
+        for k, v in nested_dict.items():
+            count_fields(counts, v, prefix=prefix + '.' + k)
+    else:
+        counts[prefix] += 1
+
+
+def flatten(nested_dict, flat_dict, prefix=''):
+    if hasattr(nested_dict, 'items'):
+        for k, v in nested_dict.items():
+            flatten(v, flat_dict, prefix=prefix + '.' + k)
+    else:
+        flat_dict[prefix] = nested_dict
+
+
+def write_payslip_csv_month_rows(categories, csv_table):
+    with open('payslips-month-rows.csv', 'w', newline='', encoding='utf-8') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=categories)
+        writer.writeheader()
+        for row in csv_table:
+            writer.writerow(row)
+
+
+def write_payslip_csv_month_columns(columns, csv_table):
+    with open('payslips-month-columns.csv', 'w', newline='', encoding='utf-8') as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=columns)
+        # writer.writeheader()
+        for row in csv_table:
+            writer.writerow(row)
+
+
+def partition(pred, iterable):
+    'Use a predicate to partition entries into false entries and true entries'
+    # partition(is_odd, range(10)) --> 0 2 4 6 8   and  1 3 5 7 9
+    from itertools import tee
+    from itertools import filterfalse
+    t1, t2 = tee(iterable)
+    return filterfalse(pred, t1), filter(pred, t2)
+
+
+def enforce_order(iterable, prefixes: list):
+    remainder = iterable
+    result = []
+    for prefix in prefixes:
+        remainder, matching = partition(lambda x: x.startswith(prefix), remainder)
+        remainder = list(remainder)
+        result += sorted(matching)
+    result += sorted(remainder)
+    return result
+
+
+if __name__ == '__main__':
+    payslips_dir = Path(sys.argv[1])
+    counts = collections.Counter()
+    csv_rows_table = []
+    for payslip_file in sorted(payslips_dir.glob('*.txt')):
+        # if payslip_file.name < '2018-04-' or payslip_file.name > '2019-04-':
+        #     continue
+        payslip_text = payslip_file.read_text(encoding='utf-8')
+        if 'Employee Number' not in payslip_text:
+            print(f"Skipping {payslip_file} ...")
+            continue
+        print(f"Parsing {payslip_file} ...")
+        payslip = parse_payslip(payslip_text)
+
+        count_fields(counts, payslip)
+        flat_payslip = {}
+        flatten(payslip, flat_payslip)
+        csv_rows_table.append(flat_payslip)
+
+    categories = counts.keys()
+    categories = enforce_order(categories, FIELDS_ORDER)
+
+    # pprint('\n'.join(categories))
+    # print(len(categories))
+    write_payslip_csv_month_rows(categories, csv_rows_table)
+
+    for unwanted_field in UNWANTED_FIELDS:
+        categories.remove(unwanted_field)
+
+    csv_cols_table = []
+    columns = [HEADER_FIELD, *[payslip[HEADER_FIELD] for payslip in csv_rows_table]]
+    for category in categories:
+        category_row = OrderedDict()
+        category_row[HEADER_FIELD] = category
+        for payslip in csv_rows_table:
+            month = payslip[HEADER_FIELD]
+            category_row[month] = payslip.get(category)
+        csv_cols_table.append(category_row)
+
+    write_payslip_csv_month_columns(columns, csv_cols_table)
+    print("Done.")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f63c058
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+-i https://pypi.org/simple
+chardet==3.0.4
+pdfminer.six==20181108
+pycryptodome==3.8.2
+six==1.12.0
+sortedcontainers==2.1.0
diff --git a/text_payslips/.keep b/text_payslips/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/to_text.py b/to_text.py
new file mode 100644
index 0000000..7ec3ea1
--- /dev/null
+++ b/to_text.py
@@ -0,0 +1,49 @@
+import sys
+from pathlib import Path
+
+import pdfminer.high_level
+import pdfminer.layout
+import pdfminer.settings
+
+pdfminer.settings.STRICT = False
+
+
+def extract_text(pdf_file, outfile,
+                 no_laparams=False, all_texts=None, detect_vertical=None,  # LAParams
+                 word_margin=None, char_margin=None, line_margin=None, boxes_flow=None,  # LAParams
+                 output_type='text', codec='utf-8', strip_control=False,
+                 maxpages=0, page_numbers=None, password="", scale=1.0, rotation=0,
+                 layoutmode='normal', output_dir=None, debug=False,
+                 disable_caching=False, **other):
+    if not pdf_file:
+        raise ValueError("Must provide file to work upon!")
+
+    # If any LAParams group arguments were passed, create an LAParams object and
+    # populate with given args. Otherwise, set it to None.
+    if not no_laparams:
+        laparams = pdfminer.layout.LAParams()
+        for param in ("all_texts", "detect_vertical", "word_margin", "char_margin", "line_margin", "boxes_flow"):
+            paramv = locals().get(param, None)
+            if paramv is not None:
+                setattr(laparams, param, paramv)
+    else:
+        laparams = None
+
+    with open(outfile, "wb") as outfp:
+        with open(pdf_file, "rb") as fp:
+            pdfminer.high_level.extract_text_to_fp(fp, **locals())
+
+
+if __name__ == '__main__':
+    print(sys.argv)
+    source_dir = Path(sys.argv[1]).resolve()
+    dest_dir = Path(sys.argv[2]).resolve()
+    for pdf_file in sorted(source_dir.glob('*.pdf')):
+        txt_file = dest_dir.joinpath(pdf_file.name).with_suffix('.txt')
+        print(pdf_file)
+        print(txt_file)
+        if txt_file.exists():
+            print("Already exists. Skipping...")
+        else:
+            print("Extracting...")
+            extract_text(pdf_file=pdf_file, outfile=txt_file)