1-在本机Docker开发环境部署数据湖
本文描述了如何在本地开发环境部署 LibianDatalake 数据湖。
1. 创建环境变量文件
第一步,先创建好 .env
文件。它之后会被 shell 命令与 docker-compose.yml
读取。
WARNING
⚠️ 以下 .env
文件中的值仅作为开发环境使用,切勿使用于生产环境。
bash
echo '
# Copy this file and rename to .env
POSTGRES_HOSTNAME=postgres.libian-datalake
POSTGRES_DB=libian-datalake
POSTGRES_USERNAME=postgres
POSTGRES_PASSWORD=libian-datalake-dev-password
PGADMIN_MY_EMAIL=pgadmin-libian-datalake@example.com
PGADMIN_MY_PASSWORD=libian-datalake-dev-password
NC_PUBLIC_URL=http://nocodb.libian-datalake
NC_ADMIN_EMAIL=ncadmin-libian-datalake@example.com
NC_ADMIN_PASSWORD=libian-datalake-dev-password
#SMTP_SERVER=smtp.gmail.com
#SMTP_PORT=465
#SMTP_SSL_PY_BOOL=True
#SMTP_SSL_JS_BOOL=true
#SMTP_USERNAME=smtpxxxxsmtp
#SMTP_PASSWORD=smtpxxxxsmtp
#SMTP_SENDER=smtpxxxxsmtp@gmail.com
#
# The internal address( Schema + Hostname + Port )
# should be same as the public address.
#
# Because nocodb backend uses the internal address,
# and the internal address will be exposed in the nocodb table data.
#
MINIOSNSD_HOSTNAME=miniosnsd.libian-datalake
# The public network port is the same as the port in the container,
# (but the mapped port has nothing to do with it)
MINIOSNSD_BOTH_PORT=80
MINIO_ROOT_USER=myminioadmin
MINIO_ROOT_PASSWORD=libian-datalake-dev-password
MINIO_BROWSER_REDIRECT_URL=http://minioconsole.libian-datalake
' > .env
.dotenv
# Copy this file and rename to .env
POSTGRES_HOSTNAME=postgres.libian-datalake
POSTGRES_DB=libian-datalake
POSTGRES_USERNAME=postgres
POSTGRES_PASSWORD=libian-datalake-dev-password
PGADMIN_MY_EMAIL=pgadmin-libian-datalake@example.com
PGADMIN_MY_PASSWORD=libian-datalake-dev-password
NC_PUBLIC_URL=http://nocodb.libian-datalake
NC_ADMIN_EMAIL=ncadmin-libian-datalake@example.com
NC_ADMIN_PASSWORD=libian-datalake-dev-password
#SMTP_SERVER=smtp.gmail.com
#SMTP_PORT=465
#SMTP_SSL_PY_BOOL=True
#SMTP_SSL_JS_BOOL=true
#SMTP_USERNAME=smtpxxxxsmtp
#SMTP_PASSWORD=smtpxxxxsmtp
#SMTP_SENDER=smtpxxxxsmtp@gmail.com
#
# The internal address( Schema + Hostname + Port )
# should be same as the public address.
#
# Because nocodb backend uses the internal address,
# and the internal address will be exposed in the nocodb table data.
#
MINIOSNSD_HOSTNAME=miniosnsd.libian-datalake
# The public network port is the same as the port in the container,
# (but the mapped port has nothing to do with it)
MINIOSNSD_BOTH_PORT=80
MINIO_ROOT_USER=myminioadmin
MINIO_ROOT_PASSWORD=libian-datalake-dev-password
MINIO_BROWSER_REDIRECT_URL=http://minioconsole.libian-datalake
然后使用以下命令读入并检查 .env
中的环境变量。
bash
[ ! -f .env ] || export $(grep -v '^#' .env | xargs) && echo $POSTGRES_HOSTNAME
shell
export $(cat .env | xargs) && echo $POSTGRES_HOSTNAME
2. 创建 MinIO 配置文件
MinIO 的单节点单磁盘(SNSD, Single Node Single Drive)部署模式需要通过配置文件来指定运行环境参数。为此,我们需要创建并导入必要的环境变量。
根据 MinIO 官方文档,请按照以下步骤操作:
bash
[ ! -f .env ] || export $(grep -v '^#' .env | xargs) && \
echo "
# MINIO_ROOT_USER and MINIO_ROOT_PASSWORD sets the root account for the MinIO server.
# This user has unrestricted permissions to perform S3 and administrative API operations on any resource in the deployment.
# Omit to use the default values 'minioadmin:minioadmin'.
# MinIO recommends setting non-default values as a best practice, regardless of environment
MINIO_ROOT_USER=$MINIO_ROOT_USER
MINIO_ROOT_PASSWORD=$MINIO_ROOT_PASSWORD
# MINIO_VOLUMES sets the storage volume or path to use for the MinIO server.
MINIO_VOLUMES=\"/mnt/data\"
" > ./minio.config.env && \
cat ./minio.config.env && \
chmod 600 ./minio.config.env && \
ls -la
3. 创建 pgadmin4 数据目录并设置用户权限
为了确保pgAdmin4容器能够正确运行并管理数据库,需要在启动容器之前,预先创建必要的存储卷目录,并设置正确的用户和权限。
为什么需要预先创建存储卷
pgAdmin4容器通常以非root用户身份运行(例如UID:5050),因此宿主机上的存储卷目录必须与容器内部的用户环境保持一致,以确保容器内的进程能够正确访问和修改这些目录下的文件。如果不预先设置正确的用户组和权限,可能会导致容器无法写入数据或出现其他权限相关的问题。
可参考 pgAdmin 官方文档 了解更多。
bash
[ ! -f .env ] || export $(grep -v '^#' .env | xargs) && \
echo $POSTGRES_HOSTNAME && \
echo $PGADMIN_MY_EMAIL && \
mkdir -p ./volume/pgadmin_data && \
mkdir -p ./volume/pgadmin_config && \
echo "{
\"Servers\": {
\"1\": {
\"Name\": \"Datalake Postgres\",
\"Group\": \"Libian\",
\"Username\": \"$POSTGRES_USERNAME\",
\"Host\": \"$POSTGRES_HOSTNAME\",
\"Port\": 5432,
\"SSLMode\": \"disable\",
\"MaintenanceDB\": \"$POSTGRES_DB\",
\"PassFile\": \"~/.pgpass\"
}
}
}" > ./volume/pgadmin_config/servers.json && \
echo "$POSTGRES_HOSTNAME:5432:*:$POSTGRES_USERNAME:$POSTGRES_PASSWORD" > ./volume/pgadmin_config/pgpass && \
echo "import logging
# Switch between server and desktop mode
SERVER_MODE = True
#Change pgAdmin config DB path
CONFIG_DATABASE_URI='postgresql://$POSTGRES_USERNAME:$POSTGRES_PASSWORD@$POSTGRES_HOSTNAME:5432/$POSTGRES_DB?application_name=libian-datalake-pgadmin-config&sslmode=disable'
# Change log level
CONSOLE_LOG_LEVEL = logging.INFO
FILE_LOG_LEVEL = logging.INFO
" > ./volume/pgadmin_config/config_local.py && \
chown -R 5050:5050 ./volume/pgadmin_data && \
chown -R 5050:5050 ./volume/pgadmin_config && \
chown -R 5050:5050 ./volume/pgadmin_config/servers.json && \
chown -R 5050:5050 ./volume/pgadmin_config/pgpass && \
chown -R 5050:5050 ./volume/pgadmin_config/config_local.py && \
cat ./volume/pgadmin_config/servers.json && \
cat ./volume/pgadmin_config/pgpass && \
cat ./volume/pgadmin_config/config_local.py && \
chmod 644 ./volume/pgadmin_config/servers.json && \
chmod 644 ./volume/pgadmin_config/config_local.py && \
chmod 600 ./volume/pgadmin_config/pgpass && \
ls -la volume/*
4. Docker Compose 部署
4.1 创建 docker-compose.yml 文件
第四步,创建 docker-compose.yml
。
shell
echo '
name: "libian-datalake"
x-env: &env
GENERIC_TIMEZONE: Asia/Shanghai
TZ: Asia/Shanghai
services:
postgres-db:
image: postgres:17
restart: always
hostname: ${POSTGRES_HOSTNAME}
env_file:
- .env
ports:
- "18191:5432"
environment:
POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_USER: ${POSTGRES_USERNAME}
PGSSLMODE: disable
POSTGRES_HOST_AUTH_METHOD: md5
<<: *env
healthcheck:
interval: 10s
retries: 10
test: "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""
timeout: 2s
volumes:
- ./volume/db_data:/var/lib/postgresql/data
command:
- "-c"
- "ssl=off"
miniosnsd:
image: "minio/minio:latest"
# The internal domain name and port should be
# the same as the public domain name and port.
#
# Because nocodb backend uses the internal port,
# and the internal port will be exposed in the nocodb table data.
hostname: ${MINIOSNSD_HOSTNAME}
restart: always
env_file:
- .env
ports:
- "18194:${MINIOSNSD_BOTH_PORT}"
- "18195:9001"
environment:
MINIO_CONFIG_ENV_FILE: /etc/config.env
MINIO_BROWSER_REDIRECT_URL: ${MINIO_BROWSER_REDIRECT_URL}
<<: *env
volumes:
- "./minio.config.env:/etc/config.env"
- "./volume/miniosnsd_data:/mnt/data"
healthcheck:
test: ["CMD", "curl", "-k", "--silent", "--fail", "http://localhost:9001"]
interval: 1m
timeout: 20s
retries: 5
start_period: 5m
start_interval: 30s
command:
- "server"
- "--address"
- ":${MINIOSNSD_BOTH_PORT}"
- "--console-address"
- ":9001"
nocodb:
image: "nocodb/nocodb:latest"
restart: always
ports:
- "18193:8080"
env_file:
- .env
depends_on:
postgres-db:
condition: service_healthy
miniosnsd:
condition: service_healthy
environment:
NC_DB: "pg://${POSTGRES_HOSTNAME}:5432?u=${POSTGRES_USERNAME}&p=${POSTGRES_PASSWORD}&d=${POSTGRES_DB}&application_name=libian-datalake-nocodb&sslmode=disable"
NC_PUBLIC_URL: ${NC_PUBLIC_URL}
NC_ADMIN_EMAIL: ${NC_ADMIN_EMAIL}
NC_ADMIN_PASSWORD: ${NC_ADMIN_PASSWORD}
NC_INVITE_ONLY_SIGNUP: true
NC_DISABLE_TELE: true
NC_SMTP_FROM: ${SMTP_SENDER}
NC_SMTP_HOST: ${SMTP_SERVER}
NC_SMTP_PORT: ${SMTP_PORT}
NC_SMTP_USERNAME: ${SMTP_USERNAME}
NC_SMTP_PASSWORD: ${SMTP_PASSWORD}
NC_SMTP_SECURE: ${SMTP_SSL_JS_BOOL}
<<: *env
volumes:
- "./volume/nc_data:/usr/app/data"
healthcheck:
test: ["CMD", "wget", "--spider", "http://localhost:8080/favicon.ico"]
interval: 1m
timeout: 20s
retries: 5
start_period: 5m
start_interval: 30s
pgadmin:
env_file:
- .env
ports:
- "18192:80"
depends_on:
nocodb:
condition: service_healthy
image: dpage/pgadmin4:9.1
restart: always
environment:
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_MY_EMAIL}
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_MY_PASSWORD}
PGADMIN_CONFIG_WTF_CSRF_ENABLED: "False"
PGADMIN_CONFIG_WTF_CSRF_CHECK_DEFAULT: "False"
PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "False"
PGADMIN_CONFIG_MAX_LOGIN_ATTEMPTS: 15
PGADMIN_CONFIG_CHECK_EMAIL_DELIVERABILITY: "False"
<<: *env
volumes:
- "./volume/pgadmin_data:/var/lib/pgadmin"
- "./volume/pgadmin_config/servers.json:/pgadmin4/servers.json"
- "./volume/pgadmin_config/pgpass:/home/pgadmin/.pgpass"
- "./volume/pgadmin_config/config_local.py:/pgadmin4/config_local.py"
healthcheck:
test: ["CMD", "wget", "--spider", "http://localhost:80"]
interval: 1m
timeout: 20s
retries: 5
start_period: 30m
start_interval: 30s
' > docker-compose.yml
yml
name: "libian-datalake"
x-env: &env
GENERIC_TIMEZONE: Asia/Shanghai
TZ: Asia/Shanghai
services:
postgres-db:
image: postgres:17
restart: always
hostname: ${POSTGRES_HOSTNAME}
env_file:
- .env
ports:
- "18191:5432"
environment:
POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_USER: ${POSTGRES_USERNAME}
PGSSLMODE: disable
POSTGRES_HOST_AUTH_METHOD: md5
<<: *env
healthcheck:
interval: 10s
retries: 10
test: "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""
timeout: 2s
volumes:
- ./volume/db_data:/var/lib/postgresql/data
command:
- "-c"
- "ssl=off"
miniosnsd:
image: "minio/minio:latest"
# The internal domain name and port should be
# the same as the public domain name and port.
#
# Because nocodb backend uses the internal port,
# and the internal port will be exposed in the nocodb table data.
hostname: ${MINIOSNSD_HOSTNAME}
restart: always
env_file:
- .env
ports:
- "18194:${MINIOSNSD_BOTH_PORT}"
- "18195:9001"
environment:
MINIO_CONFIG_ENV_FILE: /etc/config.env
MINIO_BROWSER_REDIRECT_URL: ${MINIO_BROWSER_REDIRECT_URL}
<<: *env
volumes:
- "./minio.config.env:/etc/config.env"
- "./volume/miniosnsd_data:/mnt/data"
healthcheck:
test: ["CMD", "curl", "-k", "--silent", "--fail", "http://localhost:9001"]
interval: 1m
timeout: 20s
retries: 5
start_period: 5m
start_interval: 30s
command:
- "server"
- "--address"
- ":${MINIOSNSD_BOTH_PORT}"
- "--console-address"
- ":9001"
nocodb:
image: "nocodb/nocodb:latest"
restart: always
ports:
- "18193:8080"
env_file:
- .env
depends_on:
postgres-db:
condition: service_healthy
miniosnsd:
condition: service_healthy
environment:
NC_DB: "pg://${POSTGRES_HOSTNAME}:5432?u=${POSTGRES_USERNAME}&p=${POSTGRES_PASSWORD}&d=${POSTGRES_DB}&application_name=libian-datalake-nocodb&sslmode=disable"
NC_PUBLIC_URL: ${NC_PUBLIC_URL}
NC_ADMIN_EMAIL: ${NC_ADMIN_EMAIL}
NC_ADMIN_PASSWORD: ${NC_ADMIN_PASSWORD}
NC_INVITE_ONLY_SIGNUP: true
NC_DISABLE_TELE: true
NC_SMTP_FROM: ${SMTP_SENDER}
NC_SMTP_HOST: ${SMTP_SERVER}
NC_SMTP_PORT: ${SMTP_PORT}
NC_SMTP_USERNAME: ${SMTP_USERNAME}
NC_SMTP_PASSWORD: ${SMTP_PASSWORD}
NC_SMTP_SECURE: ${SMTP_SSL_JS_BOOL}
<<: *env
volumes:
- "./volume/nc_data:/usr/app/data"
healthcheck:
test: ["CMD", "wget", "--spider", "http://localhost:8080/favicon.ico"]
interval: 1m
timeout: 20s
retries: 5
start_period: 5m
start_interval: 30s
pgadmin:
env_file:
- .env
ports:
- "18192:80"
depends_on:
nocodb:
condition: service_healthy
image: dpage/pgadmin4:9.1
restart: always
environment:
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_MY_EMAIL}
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_MY_PASSWORD}
PGADMIN_CONFIG_WTF_CSRF_ENABLED: "False"
PGADMIN_CONFIG_WTF_CSRF_CHECK_DEFAULT: "False"
PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "False"
PGADMIN_CONFIG_MAX_LOGIN_ATTEMPTS: 15
PGADMIN_CONFIG_CHECK_EMAIL_DELIVERABILITY: "False"
<<: *env
volumes:
- "./volume/pgadmin_data:/var/lib/pgadmin"
- "./volume/pgadmin_config/servers.json:/pgadmin4/servers.json"
- "./volume/pgadmin_config/pgpass:/home/pgadmin/.pgpass"
- "./volume/pgadmin_config/config_local.py:/pgadmin4/config_local.py"
healthcheck:
test: ["CMD", "wget", "--spider", "http://localhost:80"]
interval: 1m
timeout: 20s
retries: 5
start_period: 30m
start_interval: 30s
4.2 运行
运行此命令以部署:
shell
docker compose up
5. 最后
服务 | 地址 | 管理员账号 | 管理员密码 | 备注 |
---|---|---|---|---|
postgres | localhost:18191 | postgres | libian-datalake-dev-password | 登陆时语言一定要选 English |
pgadmin | http://localhost:18192 | pgadmin-libian-datalake@example.com | libian-datalake-dev-password | |
nocodb | http://localhost:18193 | ncadmin-libian-datalake@example.com | libian-datalake-dev-password | 需要参照 First-Init 配置 postgres 数据源。 |
minio | localhost:18194 | 需去控制台配置 access token | ||
minio 控制台 | http://localhost:18195 | myminioadmin | libian-datalake-dev-password |