Browse Source

Initial commit

Nikola Kotur 5 years ago
commit
bdc77b0af1
10 changed files with 564 additions and 0 deletions
  1. 101 0
      .gitignore
  2. 12 0
      Dockerfile
  3. 21 0
      LICENSE
  4. 19 0
      README.md
  5. 20 0
      certstream/__init__.py
  6. 134 0
      certstream/certlib.py
  7. 192 0
      certstream/watcher.py
  8. 55 0
      certstream/webserver.py
  9. 7 0
      requirements.txt
  10. 3 0
      run_server.py

+ 101 - 0
.gitignore

@@ -0,0 +1,101 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+.venv/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
+.idea/
+
+*.sqlite3
+phishfinder/secrets.py
+
+celerybeat-schedule
+.DS_Store
+
+*.retry

+ 12 - 0
Dockerfile

@@ -0,0 +1,12 @@
+FROM python:3
+
+WORKDIR /usr/src/app
+
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+EXPOSE 8080
+
+CMD [ "python", "./run_server.py" ]

+ 21 - 0
LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Cali Dog Security
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 19 - 0
README.md

@@ -0,0 +1,19 @@
+# Certstream-watcher
+
+This is Platform.sh project written to aggregate, parse and watch certificate data from many [certificate transparency logs](https://www.certificate-transparency.org/what-is-ct) and notify if certain pattern is noticed. Based on code from [CaliDog](https://github.com/CaliDog/certstream-server-python).
+
+## Setup
+
+* Variable to watch endings
+* Slack integration hook
+
+## Development
+
+```
+virtualenv -p python3 .env
+source .env/bin/activate
+
+pip install -r requirements.txt
+
+python run_server.py
+```

+ 20 - 0
certstream/__init__.py

@@ -0,0 +1,20 @@
+import logging
+import asyncio
+
+import uvloop
+asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+
+from certstream.watcher import TransparencyWatcher, mux_ctl_stream
+from certstream.webserver import WebServer
+
+logging.basicConfig(format='[%(levelname)s:%(name)s] %(asctime)s - %(message)s', level=logging.INFO)
+
+def run():
+    logging.info("Starting CertStream...")
+
+    loop = asyncio.get_event_loop()
+    watcher = TransparencyWatcher(loop)
+    asyncio.ensure_future(asyncio.gather(*watcher.get_tasks()))
+    asyncio.ensure_future(mux_ctl_stream(watcher))
+    webserver = WebServer(loop, watcher)
+    webserver.run_server()

+ 134 - 0
certstream/certlib.py

@@ -0,0 +1,134 @@
+import base64
+import datetime
+import logging
+import time
+
+from collections import OrderedDict
+
+from OpenSSL import crypto
+from construct import Struct, Byte, Int16ub, Int64ub, Enum, Bytes, \
+    Int24ub, this, GreedyBytes, GreedyRange, Terminated, Embedded
+
+
+MerkleTreeHeader = Struct(
+    "Version"         / Byte,
+    "MerkleLeafType"  / Byte,
+    "Timestamp"       / Int64ub,
+    "LogEntryType"    / Enum(Int16ub, X509LogEntryType=0, PrecertLogEntryType=1),
+    "Entry"           / GreedyBytes
+)
+
+Certificate = Struct(
+    "Length" / Int24ub,
+    "CertData" / Bytes(this.Length)
+)
+
+CertificateChain = Struct(
+    "ChainLength" / Int24ub,
+    "Chain" / GreedyRange(Certificate),
+)
+
+PreCertEntry = Struct(
+    "LeafCert" / Certificate,
+    Embedded(CertificateChain),
+    Terminated
+)
+
+def dump_extensions(certificate):
+    extensions = {}
+    for x in range(certificate.get_extension_count()):
+        extension_name = ""
+        try:
+            extension_name = certificate.get_extension(x).get_short_name()
+
+            if extension_name == b'UNDEF':
+                continue
+
+            extensions[extension_name.decode('latin-1')] = certificate.get_extension(x).__str__()
+        except:
+            try:
+                extensions[extension_name.decode('latin-1')] = "NULL"
+            except Exception as e:
+                logging.debug("Extension parsing error -> {}".format(e))
+    return extensions
+
+def serialize_certificate(certificate):
+    subject = certificate.get_subject()
+    not_before_datetime = datetime.datetime.strptime(certificate.get_notBefore().decode('ascii'), "%Y%m%d%H%M%SZ")
+    not_after_datetime = datetime.datetime.strptime(certificate.get_notAfter().decode('ascii'), "%Y%m%d%H%M%SZ")
+    return {
+        "subject": {
+            "aggregated": repr(certificate.get_subject())[18:-2],
+            "C": subject.C,
+            "ST": subject.ST,
+            "L": subject.L,
+            "O": subject.O,
+            "OU": subject.OU,
+            "CN": subject.CN
+        },
+        "extensions": dump_extensions(certificate),
+        "not_before": not_before_datetime.timestamp(),
+        "not_after": not_after_datetime.timestamp(),
+        "serial_number": '{0:x}'.format(int(certificate.get_serial_number())),
+        "fingerprint": str(certificate.digest("sha1"), 'utf-8'),
+        "as_der": base64.b64encode(
+            crypto.dump_certificate(
+                crypto.FILETYPE_ASN1, certificate
+            )
+        ).decode('utf-8')
+    }
+
+def add_all_domains(cert_data):
+    all_domains = []
+
+    # Apparently we have certificates with null CNs....what?
+    if cert_data['leaf_cert']['subject']['CN']:
+        all_domains.append(cert_data['leaf_cert']['subject']['CN'])
+
+    subject_alternative_name = cert_data['leaf_cert']['extensions'].get('subjectAltName')
+
+    if subject_alternative_name:
+        for entry in subject_alternative_name.split(', '):
+            if entry.startswith('DNS:'):
+                all_domains.append(entry.replace('DNS:', ''))
+
+    cert_data['leaf_cert']['all_domains'] = list(OrderedDict.fromkeys(all_domains))
+
+    return cert_data
+
+def parse_ctl_entry(entry, operator_information):
+    mtl = MerkleTreeHeader.parse(base64.b64decode(entry['leaf_input']))
+
+    cert_data = {}
+
+    if mtl.LogEntryType == "X509LogEntryType":
+        cert_data['update_type'] = "X509LogEntry"
+        chain = [crypto.load_certificate(crypto.FILETYPE_ASN1, Certificate.parse(mtl.Entry).CertData)]
+        extra_data = CertificateChain.parse(base64.b64decode(entry['extra_data']))
+        for cert in extra_data.Chain:
+            chain.append(crypto.load_certificate(crypto.FILETYPE_ASN1, cert.CertData))
+    else:
+        cert_data['update_type'] = "PreCertEntry"
+        extra_data = PreCertEntry.parse(base64.b64decode(entry['extra_data']))
+        chain = [crypto.load_certificate(crypto.FILETYPE_ASN1, extra_data.LeafCert.CertData)]
+
+        for cert in extra_data.Chain:
+            chain.append(
+                crypto.load_certificate(crypto.FILETYPE_ASN1, cert.CertData)
+            )
+
+    cert_data.update({
+        "leaf_cert": serialize_certificate(chain[0]),
+        "chain": [serialize_certificate(x) for x in chain[1:]],
+        "cert_index": entry['index'],
+        "seen": time.time()
+    })
+
+    add_all_domains(cert_data)
+
+    cert_data['source'] = {
+        "url": operator_information['url'],
+        "name": operator_information['description']
+    }
+
+    return cert_data

+ 192 - 0
certstream/watcher.py

@@ -0,0 +1,192 @@
+import asyncio
+import logging
+import math
+import sys
+import os
+import datetime
+
+import aiohttp
+import requests
+
+from certstream.certlib import parse_ctl_entry
+
+
+class TransparencyWatcher(object):
+    # These are a list of servers that we shouldn't even try to connect to. In testing they either had bad
+    # DNS records, resolved to un-routable IP addresses, or didn't have valid SSL certificates.
+    BAD_CT_SERVERS = [
+        "alpha.ctlogs.org",
+        "clicky.ct.letsencrypt.org",
+        "ct.akamai.com",
+        "ct.filippo.io/behindthesofa",
+        "ct.gdca.com.cn",
+        "ct.izenpe.com",
+        "ct.izenpe.eus",
+        "ct.sheca.com",
+        "ct.startssl.com",
+        "ct.wosign.com",
+        "ctlog.api.venafi.com",
+        "ctlog.gdca.com.cn",
+        "ctlog.sheca.com",
+        "ctlog.wosign.com",
+        "ctlog2.wosign.com",
+        "flimsy.ct.nordu.net:8080",
+        "log.certly.io",
+        "nessie2021.ct.digicert.com/log",
+        "plausible.ct.nordu.net",
+        "www.certificatetransparency.cn/ct",
+        "ctserver.cnnic.cn",
+    ]
+
+    MAX_BLOCK_SIZE = 64
+
+    def __init__(self, _loop):
+        self.loop = _loop
+        self.stopped = False
+        self.logger = logging.getLogger('certstream.watcher')
+
+        self.stream = asyncio.Queue(maxsize=3000)
+        self.lastseen = None
+
+        self.logger.info("Initializing the CTL watcher")
+
+    def _initialize_ts_logs(self):
+        try:
+            self.transparency_logs = requests.get('https://www.gstatic.com/ct/log_list/all_logs_list.json').json()
+        except Exception as e:
+            self.logger.fatal("Invalid response from certificate directory! Exiting :(")
+            sys.exit(1)
+
+        self.logger.info("Retrieved transparency log with {} entries to watch.".format(len(self.transparency_logs['logs'])))
+        for entry in self.transparency_logs['logs']:
+            if entry['url'].endswith('/'):
+                entry['url'] = entry['url'][:-1]
+            self.logger.info("  + {}".format(entry['description']))
+
+    async def _print_memory_usage(self):
+        import objgraph
+        import gc
+
+        while True:
+            print("Stream backlog : {}".format(self.stream.qsize()))
+            gc.collect()
+            objgraph.show_growth()
+            await asyncio.sleep(60)
+
+    def get_tasks(self):
+        self._initialize_ts_logs()
+
+        coroutines = []
+
+        if os.getenv("DEBUG_MEMORY", False):
+            coroutines.append(self._print_memory_usage())
+
+        for log in self.transparency_logs['logs']:
+            if log['url'] not in self.BAD_CT_SERVERS:
+                coroutines.append(self.watch_for_updates_task(log))
+        return coroutines
+
+    def stop(self):
+        self.logger.info('Got stop order, exiting...')
+        self.stopped = True
+        for task in asyncio.Task.all_tasks():
+            task.cancel()
+
+    async def watch_for_updates_task(self, operator_information):
+        try:
+            latest_size = 0
+            name = operator_information['description']
+            while not self.stopped:
+                try:
+                    async with aiohttp.ClientSession(loop=self.loop) as session:
+                        async with session.get("https://{}/ct/v1/get-sth".format(operator_information['url'])) as response:
+                            info = await response.json()
+                except aiohttp.ClientError as e:
+                    self.logger.info('[{}] Exception -> {}'.format(name, e))
+                    await asyncio.sleep(600)
+                    continue
+
+                tree_size = info.get('tree_size')
+
+                # TODO: Add in persistence and id tracking per log
+                if latest_size == 0:
+                    latest_size = tree_size
+
+                if latest_size < tree_size:
+                    self.logger.info('[{}] [{} -> {}] New certs found, updating!'.format(name, latest_size, tree_size))
+
+                    try:
+                        async for result_chunk in self.get_new_results(operator_information, latest_size, tree_size):
+                            for entry in result_chunk:
+                                cert_data = parse_ctl_entry(entry, operator_information)
+                                # if cert_data['update_type'] == 'X509LogEntry':
+                                #     print(cert_data['source']['url'], cert_data['leaf_cert']['subject']['CN'], cert_data['leaf_cert']['extensions']['subjectAltName'])
+                                self.lastseen = datetime.datetime.now().isoformat()
+                                await self.stream.put(cert_data)
+
+                    except aiohttp.ClientError as e:
+                        self.logger.info('[{}] Exception -> {}'.format(name, e))
+                        await asyncio.sleep(600)
+                        continue
+
+                    except Exception as e:
+                        print("Encountered an exception while getting new results! -> {}".format(e))
+                        return
+
+                    latest_size = tree_size
+                else:
+                    self.logger.debug('[{}][{}|{}] No update needed, continuing...'.format(name, latest_size, tree_size))
+
+                await asyncio.sleep(30)
+        except Exception as e:
+            print("Encountered an exception while getting new results! -> {}".format(e))
+            return
+
+    async def get_new_results(self, operator_information, latest_size, tree_size):
+        # The top of the tree isn't actually a cert yet, so the total_size is what we're aiming for
+        total_size = tree_size - latest_size
+        start = latest_size
+
+        end = start + self.MAX_BLOCK_SIZE
+
+        chunks = math.ceil(total_size / self.MAX_BLOCK_SIZE)
+
+        self.logger.info("Retrieving {} certificates ({} -> {}) for {}".format(tree_size-latest_size, latest_size, tree_size, operator_information['description']))
+        async with aiohttp.ClientSession(loop=self.loop) as session:
+            for _ in range(chunks):
+                # Cap the end to the last record in the DB
+                if end >= tree_size:
+                    end = tree_size - 1
+
+                assert end >= start, "End {} is less than start {}!".format(end, start)
+                assert end < tree_size, "End {} is less than tree_size {}".format(end, tree_size)
+
+                url = "https://{}/ct/v1/get-entries?start={}&end={}".format(operator_information['url'], start, end)
+
+                async with session.get(url) as response:
+                    certificates = await response.json()
+                    if 'error_message' in certificates:
+                        print("error!")
+
+                    for index, cert in zip(range(start, end+1), certificates['entries']):
+                        cert['index'] = index
+
+                    yield certificates['entries']
+
+                start += self.MAX_BLOCK_SIZE
+
+                end = start + self.MAX_BLOCK_SIZE + 1
+
+
+async def mux_ctl_stream(watcher):
+    while True:
+        cert_data = await watcher.stream.get()
+        print(cert_data['source']['url'], cert_data['leaf_cert']['subject']['CN'], cert_data['leaf_cert']['extensions'].get('subjectAltName', ''))
+
+
+if __name__ == "__main__":
+    loop = asyncio.get_event_loop()
+    watcher = TransparencyWatcher(loop)
+    asyncio.ensure_future(asyncio.gather(*watcher.get_tasks()))
+    asyncio.ensure_future(mux_ctl_stream(watcher))
+    loop.run_forever()

+ 55 - 0
certstream/webserver.py

@@ -0,0 +1,55 @@
+import json
+import logging
+import os
+
+from aiohttp import web
+from aiohttp.web_urldispatcher import Response
+
+
+STATIC_INDEX = '''
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+  </head>
+  <body>
+    <div id="app">Nothing to see here...</div>
+</html>
+'''
+
+
+class WebServer(object):
+    def __init__(self, _loop, transparency_watcher):
+        self.stats_url = os.getenv("STATS_URL", 'stats')
+        self.logger = logging.getLogger('certstream.webserver')
+
+        self.loop = _loop
+        self.watcher = transparency_watcher
+
+        self.app = web.Application(loop=self.loop)
+
+        self._add_routes()
+
+    def run_server(self):
+        web.run_app(
+            self.app,
+            port=int(os.environ.get('PORT', 8080)),
+            ssl_context=None
+        )
+
+    def _add_routes(self):
+        self.app.router.add_get("/{}".format(self.stats_url), self.stats_handler)
+        self.app.router.add_get('/', self.root_handler)
+
+    async def root_handler(self, request):
+        return Response(body=STATIC_INDEX, content_type="text/html")
+
+    async def stats_handler(self, _):
+        return web.Response(
+            body=json.dumps({
+                    "last_seen": self.watcher.lastseen,
+                }, indent=4
+            ),
+            content_type="application/json",
+        )

+ 7 - 0
requirements.txt

@@ -0,0 +1,7 @@
+construct
+uvloop
+aiohttp
+aioprocessing
+PyOpenSSL
+websockets
+requests

+ 3 - 0
run_server.py

@@ -0,0 +1,3 @@
+import certstream
+
+certstream.run()