Skip to content

Commit

Permalink
Git hooks, ruff, pg in docker-compose, and lots of lints (#11)
Browse files Browse the repository at this point in the history
* Use Django settings and lift settings utils from PostHog

* checkpoint

* fixes

* settings is not a module, duh

* checkpoint

* test out some ruff

* more fixes, but automated

* dev requirements

* postgresify dev

* bump pg version

* pg-ify both dockers

* caddify

* start of something pretty nice

* Caddy success

* if debug don't require DATABASE_URL

* fixes

* fixes2
  • Loading branch information
fuziontech authored Aug 15, 2023
1 parent c76f537 commit fdf3ed9
Show file tree
Hide file tree
Showing 40 changed files with 864 additions and 222 deletions.
164 changes: 163 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,168 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


env/
__pycache__/
node_modules
housewatch.sqlite3
.DS_Store
yarn.lock
yarn.lock
17 changes: 17 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.275
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
16 changes: 14 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,20 @@ ENV PYTHONUNBUFFERED 1
WORKDIR /code

COPY requirements.txt ./
RUN pip install -r requirements.txt --compile --no-cache-dir

RUN apt-get update && \
apt-get install -y --no-install-recommends \
"build-essential" \
"git" \
"libpq-dev" \
"libxmlsec1" \
"libxmlsec1-dev" \
"libffi-dev" \
"pkg-config" \
&& \
rm -rf /var/lib/apt/lists/* && \
pip install -r requirements.txt --compile --no-cache-dir


USER root

Expand All @@ -14,4 +27,3 @@ COPY housewatch housewatch/
COPY bin bin/

RUN DEBUG=1 python manage.py collectstatic --noinput

13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@
To deploy HouseWatch, clone this repo and then run the following, substituting the environment variables for the relevant values of one of your ClickHouse instances:

```bash
SITE_ADDRESS=<SITE_ADDRESS> \
CLICKHOUSE_HOST=localhost \
CLICKHOUSE_CLUSTER=mycluster \
CLICKHOUSE_USER=default \
CLICKHOUSE_PASSWORD=xxxxxxxxxxx \
docker compose -f docker-compose.yml up
```

After running the above, the UI will be running on [http://localhost:3000](http://localhost:3000). For production installs, you might want to setup something like [Caddy](https://caddyserver.com/) or [NGINX](https://nginx.org/en/) with a [Let's Encrypt](https://letsencrypt.org/) TLS certificate.
`SITE_ADDRESS` here is the address that the UI will be running on. It can be a domain name or simply a port like `:80`.

After running the above, the UI will be running on the address you specified. This will be something like http://localhost if you used `:80` for your `SITE_ADDRESS` above. I would think twice about exposing this to the internet, as it is not currently secured in any way.

<details>

Expand All @@ -49,13 +52,13 @@ The following are the supported environment variables for configuring your House
- `CLICKHOUSE_VERIFY`: Optional - see [clickhouse-driver docs](https://clickhouse-driver.readthedocs.io/en/latest/index.html) for more information
- `CLICKHOUSE_CA`: Optional - see [clickhouse-driver docs](https://clickhouse-driver.readthedocs.io/en/latest/index.html) for more information
- `OPENAI_API_KEY`: Optional - enables the experimental "AI Tools" page, which currently features a natural language query editor
- `OPENAI_MODEL`: Optional - a valid OpenAI model (e.g. `gpt-3.5-turbo`, `gpt-4`) that you have access to with the key above to be used for the AI features
- `OPENAI_MODEL`: Optional - a valid OpenAI model (e.g. `gpt-3.5-turbo`, `gpt-4`) that you have access to with the key above to be used for the AI features

</details>

## 💡 Motivation

At PostHog we manage a few large ClickHouse clusters and found ourselves in need of a tool to monitor and manage these more easily.
At PostHog we manage a few large ClickHouse clusters and found ourselves in need of a tool to monitor and manage these more easily.

ClickHouse is fantastic at introspection, providing a lot of metadata about the system in its system tables so that it can be easily queried. However, knowing exactly how to query and parse the available information can be a difficult task. Over the years at PostHog, we've developed great intuition for how to debug ClickHouse issues using ClickHouse, and HouseWatch is the compilation of this knowledge into a tool.

Expand All @@ -65,7 +68,7 @@ As a result, we felt it was appropriate to have these tools live in one place. U

## 🏗️ Status of the project

HouseWatch is in its early days and we have a lot more features in mind that we'd like to build into it going forward. The code could also use some cleaning up :) As of right now, it is considered Beta software and you should exercise caution when using it in production.
HouseWatch is in its early days and we have a lot more features in mind that we'd like to build into it going forward. The code could also use some cleaning up :) As of right now, it is considered Beta software and you should exercise caution when using it in production.

One potential approach is to connect HouseWatch to ClickHouse using a read-only user. In this case, the cluster management features will not work (e.g. operations, query editor), but the analysis toolset will function normally.

Expand Down Expand Up @@ -175,7 +178,7 @@ A public list of things we intend to do with HouseWatch in the near future.

<b>Cleanup</b>

- [ ] Extract README images out of repo
- [ ] Extract README images out of repo
- [ ] Make banner subtitle work on dark mode
- [ ] Fetch data independently on the query analyzer
- [ ] Breakpoint for logs search
Expand Down
2 changes: 1 addition & 1 deletion bin/celery
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
set -e

celery -A housewatch worker
celery -A housewatch worker
7 changes: 0 additions & 7 deletions bin/lint

This file was deleted.

2 changes: 1 addition & 1 deletion bin/start
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ set -e

export DEBUG=1

./bin/celery & python manage.py runserver
./bin/celery & python manage.py runserver
96 changes: 96 additions & 0 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
version: "3"

services:
app:
build: .
environment: &django_env
DEBUG: 1
REDIS_URL: redis://redis:6379
DATABASE_URL: postgres://housewatch:housewatch@db:5432/housewatch
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_DATABASE: default
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: ""
CLICKHOUSE_CLUSTER: parallel_replicas
CLICKHOUSE_SECURE: false
CLICKHOUSE_VERIFY: false
CLICKHOUSE_CA: ""
command:
- bash
- -c
- |
python manage.py migrate
python manage.py runserver 0.0.0.0:8000
volumes:
- .:/code
ports:
- "8000:8000"
depends_on:
- clickhouse
- db
- redis

web:
build:
context: ./frontend
dockerfile: Dockerfile.dev
volumes:
- ./frontend/public:/frontend/public
- ./frontend/src:/frontend/src
ports:
- "3000:3000"

db:
image: postgres:14-alpine
restart: on-failure
environment:
POSTGRES_USER: housewatch
POSTGRES_DB: housewatch
POSTGRES_PASSWORD: housewatch

healthcheck:
test: ["CMD-SHELL", "pg_isready -U housewatch"]
interval: 5s
timeout: 5s

redis:
image: redis:6.2.7-alpine
restart: on-failure
ports:
- "6388:6379"
command: redis-server --maxmemory-policy allkeys-lru --maxmemory 200mb

worker:
build: .
environment:
<<: *django_env
command:
- ./bin/celery
volumes:
- .:/code
depends_on:
- clickhouse
- db
- redis

clickhouse:
image: ${CLICKHOUSE_SERVER_IMAGE:-clickhouse/clickhouse-server:23.4.2.11}
restart: on-failure
depends_on:
- zookeeper

zookeeper:
image: zookeeper:3.7.0
restart: on-failure

caddy:
image: caddy:2.6.1
ports:
- "8888:8888"
environment:
SITE_ADDRESS: ":8888"
volumes:
- ./docker/Caddyfile:/etc/caddy/Caddyfile
depends_on:
- web
- app
Loading

0 comments on commit fdf3ed9

Please sign in to comment.