Skip to content

Commit

Permalink
feat: Add Dockerfile template (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
leehuwuj authored Mar 28, 2024
1 parent 4f10840 commit 78ded9e
Show file tree
Hide file tree
Showing 11 changed files with 209 additions and 75 deletions.
5 changes: 5 additions & 0 deletions .changeset/healthy-insects-check.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Add Dockerfile template
32 changes: 26 additions & 6 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,10 @@ export const installPythonTemplate = async ({
},
});

const compPath = path.join(templatesDir, "components");

if (dataSources.length > 0) {
const enginePath = path.join(root, "app", "engine");
const compPath = path.join(templatesDir, "components");

const vectorDbDirName = vectorDb ?? "none";
const VectorDBPath = path.join(
Expand Down Expand Up @@ -265,7 +266,19 @@ export const installPythonTemplate = async ({
// Generate loaders config
// Web loader config
if (dataSources.some((ds) => ds.type === "web")) {
const webLoaderConfig = dataSources
const webLoaderConfig = new Document({});

// Create config for browser driver arguments
const driverArgNodeValue = webLoaderConfig.createNode([
"--no-sandbox",
"--disable-dev-shm-usage",
]);
driverArgNodeValue.commentBefore =
" The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode";
webLoaderConfig.set("driver_arguments", driverArgNodeValue);

// Create config for urls
const urlConfigs = dataSources
.filter((ds) => ds.type === "web")
.map((ds) => {
const dsConfig = ds.config as WebSourceConfig;
Expand All @@ -275,13 +288,15 @@ export const installPythonTemplate = async ({
depth: dsConfig.depth,
};
});
// Add documentation to web loader config
const node = loaderConfig.createNode(webLoaderConfig);
node.commentBefore = ` base_url: The URL to start crawling with
const urlConfigNode = webLoaderConfig.createNode(urlConfigs);
urlConfigNode.commentBefore = ` base_url: The URL to start crawling with
prefix: Only crawl URLs matching the specified prefix
depth: The maximum depth for BFS traversal
You can add more websites by adding more entries (don't forget the - prefix from YAML)`;
loaderConfig.set("web", node);
webLoaderConfig.set("urls", urlConfigNode);

// Add web config to the loaders config
loaderConfig.set("web", webLoaderConfig);
}
// File loader config
if (dataSources.some((ds) => ds.type === "file")) {
Expand All @@ -308,4 +323,9 @@ export const installPythonTemplate = async ({
if (postInstallAction === "runApp" || postInstallAction === "dependencies") {
installPythonDependencies();
}

// Copy deployment files for python
await copy("**", root, {
cwd: path.join(compPath, "deployments", "python"),
});
};
5 changes: 5 additions & 0 deletions helpers/typescript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -295,4 +295,9 @@ export const installTSTemplate = async ({
if (postInstallAction === "runApp" || postInstallAction === "dependencies") {
await installTSDependencies(packageJson, packageManager, isOnline);
}

// Copy deployment files for typescript
await copy("**", root, {
cwd: path.join(compPath, "deployments", "typescript"),
});
};
26 changes: 26 additions & 0 deletions templates/components/deployments/python/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM python:3.11 as build

WORKDIR /app

ENV PYTHONPATH=/app

# Install Poetry
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=/opt/poetry python && \
cd /usr/local/bin && \
ln -s /opt/poetry/bin/poetry && \
poetry config virtualenvs.create false

# Install Chromium for web loader
# Can disable this if you don't use the web loader to reduce the image size
RUN apt update && apt install -y chromium chromium-driver

# Install dependencies
COPY ./pyproject.toml ./poetry.lock* /app/
RUN poetry install --no-root --no-cache --only main

# ====================================
FROM build as release

COPY . .

CMD ["python", "main.py"]
22 changes: 22 additions & 0 deletions templates/components/deployments/typescript/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM node:20-alpine as build

WORKDIR /app

# Install dependencies
COPY package.json pnpm-lock.yaml* /app/
RUN npm install

# Build the application
COPY . .
RUN npm run build

# ====================================
FROM build as release

# Copy built output from the previous stage
COPY --from=build /app/.next* ./.next
COPY --from=build /app/public* ./public
COPY --from=build /app/package.json ./package.json
COPY --from=build /app/node_modules ./node_modules

CMD ["npm", "start"]
5 changes: 2 additions & 3 deletions templates/components/loaders/python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ def get_documents():
document = get_file_documents(FileLoaderConfig(**loader_config))
documents.extend(document)
elif loader_type == "web":
for entry in loader_config:
document = get_web_documents(WebLoaderConfig(**entry))
documents.extend(document)
document = get_web_documents(WebLoaderConfig(**loader_config))
documents.extend(document)

return documents
29 changes: 23 additions & 6 deletions templates/components/loaders/python/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,34 @@
from pydantic import BaseModel, Field


class WebLoaderConfig(BaseModel):
class CrawlUrl(BaseModel):
base_url: str
prefix: str
max_depth: int = Field(default=1, ge=0)


class WebLoaderConfig(BaseModel):
driver_arguments: list[str] = Field(default=None)
urls: list[CrawlUrl]


def get_web_documents(config: WebLoaderConfig):
from llama_index.readers.web import WholeSiteReader
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
driver_arguments = config.driver_arguments or []
for arg in driver_arguments:
options.add_argument(arg)

docs = []
for url in config.urls:
scraper = WholeSiteReader(
prefix=url.prefix,
max_depth=url.max_depth,
driver=webdriver.Chrome(options=options),
)
docs.extend(scraper.load_data(url.base_url))

scraper = WholeSiteReader(
prefix=config.prefix,
max_depth=config.max_depth,
)
return scraper.load_data(config.base_url)
return docs
60 changes: 0 additions & 60 deletions templates/types/simple/fastapi/README-template.md

This file was deleted.

32 changes: 32 additions & 0 deletions templates/types/streaming/express/README-template.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,38 @@ NODE_ENV=production npm run start

> Note that the `NODE_ENV` environment variable is set to `production`. This disables CORS for all origins.
## Using Docker

1. Build an image for Express app:

```
docker build -t <your_backend_image_name> .
```

2. Start the app:

- Generate index data:

```
docker run --rm \
--v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
-v $(pwd)/config:/app/config \
-v $(pwd)/cache:/app/cache \ # Use your file system to store gea vector database
<your_backend_image_name>
npm run generate
```

- Start the API:

```
docker run \
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
-v $(pwd)/config:/app/config \
-v $(pwd)/cache:/app/cache \ # Use your file system to store gea vector database
-p 8000:8000 \
<your_backend_image_name>
```

## Learn More

To learn more about LlamaIndex, take a look at the following resources:
Expand Down
33 changes: 33 additions & 0 deletions templates/types/streaming/fastapi/README-template.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,39 @@ The API allows CORS for all origins to simplify development. You can change this
ENVIRONMENT=prod python main.py
```

## Using docker

1. Build an image for FastAPI app:

```
docker build -t <your_backend_image_name> .
```

2. Start the app:

- Generate embedding for index data:

```
docker run \
--rm \
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
-v $(pwd)/config:/app/config \
-v $(pwd)/storage:/app/storage \ # Use your file system to store gea vector database
<your_backend_image_name> \
python app/engine/generate.py
```

- Start the API:

```
docker run \
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
-v $(pwd)/config:/app/config \
-v $(pwd)/storage:/app/storage \ # Use your file system to store gea vector database
-p 8000:8000 \
<your_backend_image_name>
```

## Learn More

To learn more about LlamaIndex, take a look at the following resources:
Expand Down
35 changes: 35 additions & 0 deletions templates/types/streaming/nextjs/README-template.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,41 @@ You can start editing the page by modifying `app/page.tsx`. The page auto-update

This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.

## Using Docker

1. Build an image for the Next.js app:

```
docker build -t <your_app_image_name> .
```

2. Generate embeddings:

Parse the data and generate the vector embeddings if the `./data` folder exists - otherwise, skip this step:

```
docker run \
--rm \
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
-v $(pwd)/config:/app/config \
-v $(pwd)/cache:/app/cache \ # Use your file system to store gea vector database
-p 3000:3000 \
<your_app_image_name> \
npm run generate
```

3. Start the API

```
docker run \
--rm \
-v $(pwd)/.env:/app/.env \ # Use ENV variables and configuration from your file-system
-v $(pwd)/config:/app/config \
-v $(pwd)/cache:/app/cache \ # Use your file system to store gea vector database
-p 3000:3000 \
<your_app_image_name>
```

## Learn More

To learn more about LlamaIndex, take a look at the following resources:
Expand Down

0 comments on commit 78ded9e

Please sign in to comment.