Centos7.9 + Ceph-ansible安装Ceph Octopus

一、部署信息

  • Ceph版本号:Octopus / 15.2.15
  • Ceph-Ansible版本:origin/stable-5.0
  • Centos7.9 2009 + 5.15 Kernel
  • 所有节点配置业务、集群通信双网口bond0.2049 + bond1.2060
  • 所有osd节点12块2.2TB裸盘

二、节点规划

主机地址(业务口) 网关 集群内地址 主机名称 主机角色
IP:10.10.0.1/22 10.10.3.254 IP:10.60.0.1/22 ceph-mgr01 mgr,mon,osd,rgw,rbd,cephfs,mds
IP:10.10.0.2/22 10.10.3.254 IP:10.60.0.2/22 ceph-mgr02 mgr,mon,osd,rgw,rbd,cephfs,mds
IP:10.10.0.3/22 10.10.3.254 IP:10.60.0.3/22 ceph-mgr03 mgr,mon,osd,rgw,rbd,cephfs,mds
IP:10.10.0.4/22 10.10.3.254 IP:10.60.0.4/22 ceph-node01 osd,rgw,rbd,cephfs
IP:10.10.0.5/22 10.10.3.254 IP:10.60.0.5/22 ceph-node02 osd,rgw,rbd,cephfs
IP:10.10.0.6/22 10.10.3.254 IP:10.60.0.6/22 ceph-node03 osd,rgw,rbd,cephfs
IP:10.10.0.7/22 10.10.3.254 IP:10.60.0.7/22 ceph-node04 osd,rgw,rbd,cephfs
IP:10.10.0.8/22 10.10.3.254 IP:10.60.0.8/22 ceph-node05 osd,rgw,rbd,cephfs
IP:10.10.0.9/22 10.10.3.254 IP:10.60.0.9/22 ceph-node06 osd,rgw,rbd,cephfs
IP:10.10.0.10/22 10.10.3.254 IP:10.60.0.10/22 ceph-node07 osd,rgw,rbd,cephfs
VIP:10.10.0.250/22 10.10.3.254
ceph-mgr01-3、ceph-node01-7 keepalived,haproxy

三、执行部署

1. 基础准备

# disable selinux,所有节点都要执行
setenforce 0
sed -i -e 's/SELINUX=.*/SELINUX=disabled/g' /etc/selinux/config

# stop firewalld,所有节点都要执行
systemctl disable --now firewalld
systemctl disable --now NetworkManager


# hosts,所有节点都要执行
cat > /etc/hosts << HOSTS
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
10.10.0.1  ceph1
10.10.0.2  ceph2
10.10.0.3  ceph3
10.10.0.4  ceph4
10.10.0.5  ceph5
10.10.0.6  ceph6
10.10.0.7  ceph7
10.10.0.8  ceph8
10.10.0.9  ceph9
10.10.0.10 ceph10
10.10.0.250 oss.xuhandsome.org #rgw存储桶访问入口
HOSTS


# 基础rpm包安装,所有节点都要执行
yum install -y -q epel-release
yum install -y -q python3 python3-pip python36-PyYAML python36-six python36-netaddr
pip3 install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
python3 -m pip install ansible==2.9 -i https://pypi.tuna.tsinghua.edu.cn/simple
python3 -m pip install six pecan werkzeug -i https://pypi.tuna.tsinghua.edu.cn/simple


# 配置ssh免密登录,部署机执行
ssh-keygen
for i in {1..10};do ssh-copy-id ceph$i;done

# 下载ceph-ansible项目代码,部署机执行
git -b stable-5.0 clone https://github.com/ceph/ceph-ansible.git
cd ceph-ansible
cp site.yml.sample site.yml

2. 部署配置文件准备

ceph-ansible/group_vars/all.yml

cat ceph-ansible/group_vars/all.yml | grep -vE '(^#|^$)'
---
dummy:
configure_firewall: False
ceph_origin: repository
ceph_repository: community
ceph_mirror: http://mirrors.163.com/ceph
ceph_stable_key: "{{ ceph_mirror }}/keys/release.asc"
ceph_stable_release: octopus
ceph_stable_repo: "{{ ceph_mirror }}/rpm-{{ ceph_stable_release }}"
cephx: true
monitor_interface: bond0.2049
ip_version: ipv4
pg_autoscale_mode: True
public_network: 10.10.0.0/22
cluster_network: 10.60.0.0/24
osd_objectstore: bluestore
osd_auto_discovery: true
radosgw_civetweb_port: 8080
radosgw_interface: bond0.2049
radosgw_address: "{{ inventory_hostname }}"
rgw_multisite: true
rgw_multisite_proto: http
radosgw_frontend_port: 8081
rgw_instances:
  - instance_name: xuhandsome
    rgw_zone: huishan
    rgw_zonemaster: true
    rgw_zonesecondary: false
    rgw_zonegroup: jiangsu # should be set by the user
    rgw_zonegroupmaster: true
    rgw_zone_user: root
    rgw_zone_user_display_name: "Root"
    rgw_realm: cn # should be set by the user
    rgw_multisite_proto: "{{ rgw_multisite_proto }}"
    radosgw_address: "{{ radosgw_address }}"
    radosgw_frontend_port: "{{ radosgw_frontend_port }}"
    system_access_key: p9wq0vsnRi7sTSz0Ls6L # should be re-created by the user
    system_secret_key: 4w4I3bOX0bFZ6YqCWOoDhJ03eTHIAUMohf6wDZC6 # should be re-created by the user
    # Multi-site remote pull URL variables
    #rgw_pull_port: "{{ radosgw_frontend_port }}"
    #rgw_pull_proto: "http" # should be the same as rgw_multisite_proto for the master zone cluster
    #rgw_pullhost: localhost # rgw_pullhost only needs to be declared if there is a zone secondary.
dashboard_enabled: false
grafana_admin_password: admin

ceph-ansible/group_vars/osds.yml

---
dummy:
devices:
  - /dev/sdb
  - /dev/sdc
  - /dev/sdd
  - /dev/sde
  - /dev/sdf
  - /dev/sdg
  - /dev/sdh
  - /dev/sdi
  - /dev/sdj
  - /dev/sdk
  - /dev/sdl
  - /dev/sdm

ceph-ansible/group_vars/mgrs.yml

cat ceph-ansible/group_vars/mgrs.yml | grep -vE '(^#|^$)'
---
dummy:
copy_admin_key: true

ceph-ansible/group_vars/rgwloadbalancers.yml

cat ceph-ansible/group_vars/rgwloadbalancers.yml | grep -vE '(^#|^$)'
---
dummy:
haproxy_frontend_port: 80
haproxy_frontend_ssl_port: 443
haproxy_ssl_dh_param: 4096
haproxy_ssl_ciphers:
 - EECDH+AESGCM
 - EDH+AESGCM
haproxy_ssl_options:
 - no-sslv3
 - no-tlsv10
 - no-tlsv11
 - no-tls-tickets
virtual_ips:
  - 10.10.0.250
virtual_ip_netmask: 22
virtual_ip_interface: bond0.2049

ceph-ansible/hosts

cat ceph-ansible/hosts
[mons]
10.10.0.1
10.10.0.2
10.10.0.3

[mgrs]
10.10.0.1
10.10.0.2
10.10.0.3

[mdss]
10.10.0.1
10.10.0.2
10.10.0.3

[osds]
10.10.0.1
10.10.0.2
10.10.0.3
10.10.0.4
10.10.0.5
10.10.0.6
10.10.0.7
10.10.0.8
10.10.0.9
10.10.0.10

[rgws]
10.10.0.1
10.10.0.2
10.10.0.3
10.10.0.4
10.10.0.5
10.10.0.6
10.10.0.7
10.10.0.8
10.10.0.9
10.10.0.10

[rbdmirrors]
10.10.0.1
10.10.0.2
10.10.0.3
10.10.0.4
10.10.0.5
10.10.0.6
10.10.0.7
10.10.0.8
10.10.0.9
10.10.0.10

[nfss]
10.10.0.1
10.10.0.2
10.10.0.3
10.10.0.4
10.10.0.5
10.10.0.6
10.10.0.7
10.10.0.8
10.10.0.9
10.10.0.10

[grafana-server]
10.10.0.1

[clients]
10.10.0.1
10.10.0.2
10.10.0.3

[rgwloadbalancers]
10.10.0.1
10.10.0.2
10.10.0.3
10.10.0.4
10.10.0.5
10.10.0.6
10.10.0.7
10.10.0.8
10.10.0.9
10.10.0.10

3.执行部署

cd ceph-ansible
ansible-playbook -i hosts site.yml

四、部署后验证

1. 集群健康及常见WARN处理

ceph -s
  cluster:
    id:     6d380cdd-3d1b-4400-a718-ad6d43ec5edd
    health: HEALTH_OK

  services:
    mon:        3 daemons, quorum Ceph-01,Ceph-02,Ceph-03 (age 47h)
    mgr:        Ceph-03(active, since 43h), standbys: Ceph-02, Ceph-01
    mds:        cephfs:1 {0=Ceph-01=up:active} 2 up:standby
    osd:        108 osds: 108 up (since 2d), 108 in (since 3d)
    rbd-mirror: 9 daemons active (65808, 69285, 69303, 69312, 75487, 78568, 78604, 85223, 88472)
    rgw:        18 daemons active (Ceph-01.xuhandsome, Ceph-01.rgw0, Ceph-02.xuhandsome, Ceph-02.rgw0, Ceph-03.xuhandsome, Ceph-03.rgw0, Ceph-04.xuhandsome, Ceph-04.rgw0, Ceph-05.xuhandsome, Ceph-05.rgw0, Ceph-06.xuhandsome, Ceph-06.rgw0, Ceph-07.xuhandsome, Ceph-07.rgw0, Ceph-09.xuhandsome, Ceph-09.rgw0, Ceph-10.xuhandsome, Ceph-10.rgw0)
    rgw-nfs:    9 daemons active (Ceph-01, Ceph-02, Ceph-03, Ceph-04, Ceph-05, Ceph-06, Ceph-07, Ceph-09, Ceph-10)

  task status:

  data:
    pools:   16 pools, 385 pgs
    objects: 825 objects, 634 MiB
    usage:   118 GiB used, 236 TiB / 236 TiB avail
    pgs:     385 active+clean

a. HEALTH_WARN: clock skew detected on mon.ceph-02

这是集群时间不同步,前面ceph-ansible安装时已经安装了chronyd客户端,修改为同步阿里云ntp服务器

  1. 修改配置文件
cat /etc/chrony.conf |grep -vE '(^#|^$)'
server ntp.aliyun.com iburst
server ntp1.aliyun.com iburst
server ntp2.aliyun.com iburst
server ntp3.aliyun.com iburst
server ntp4.aliyun.com iburst
server ntp5.aliyun.com iburst
server ntp6.aliyun.com iburst
server ntp7.aliyun.com iburst
server ntp.cloud.aliyuncs.com iburst
server ntp7.cloud.aliyuncs.com iburst
server ntp8.cloud.aliyuncs.com iburst
server ntp9.cloud.aliyuncs.com iburst
server ntp10.cloud.aliyuncs.com iburst
server ntp11.cloud.aliyuncs.com iburst
server ntp12.cloud.aliyuncs.com iburst
driftfile /var/lib/chrony/drift
makestep 1.0 3
rtcsync
stratumweight 0
allow 0.0.0.0/0
logdir /var/log/chrony
logchange 1
  1. 重启chronyd服务
systemctl restart chronyd
chronyc sources
  1. 重启ceph-mon服务
systemctl restart ceph-mon@{nodename}
systemctl restart ceph-mon.target

b. HEALTH_WARN: mons are allowing insecure global_id reclaim

查询相关文档在此文档中发现,14.2.20修复了一个ceph身份验证框架中的安全漏洞,增加了相关警告。相关信息,请参考CVE-2021-20288。可以通过设置将其禁用,文档建议升级到O版后在禁用此设置

ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false
ceph config set mon auth_allow_insecure_global_id_reclaim false

2. Bucket及VIP验证

a. VIP连通性

ping 10.10.0.250
curl http://10.10.0.250
<?xml version="1.0" encoding="UTF-8"?><ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>anonymous</ID><DisplayName></DisplayName></Owner><Buckets></Buckets></ListAllMyBucketsResult>

b. 安装S3客户端

yum install s3cmd -y
cat << EOF > ~/.s3cfg
[default]
access_key = p9wq3vsnRg7xTSzoLs6L
access_token =
add_encoding_exts =
add_headers =
bucket_location = US
ca_certs_file =
cache_file =
check_ssl_certificate = True
check_ssl_hostname = True
cloudfront_host = oss.xuhandsome.org
connection_max_age = 5
connection_pooling = True
content_disposition =
content_type =
default_mime_type = binary/octet-stream
delay_updates = False
delete_after = False
delete_after_fetch = False
delete_removed = False
dry_run = False
enable_multipart = True
encrypt = False
expiry_date =
expiry_days =
expiry_prefix =
follow_symlinks = False
force = False
get_continue = False
gpg_command = /usr/bin/gpg
gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
gpg_passphrase =
guess_mime_type = True
host_base = oss.xuhandsome.org
#host_bucket = %(bucket)s.s3.amazonaws.com
host_bucket = oss.xuhandsome.org/%(bucket)
human_readable_sizes = False
invalidate_default_index_on_cf = False
invalidate_default_index_root_on_cf = True
invalidate_on_cf = False
kms_key =
limit = -1
limitrate = 0
list_md5 = False
log_target_prefix =
long_listing = False
max_delete = -1
mime_type =
multipart_chunk_size_mb = 15
multipart_copy_chunk_size_mb = 1024
multipart_max_chunks = 10000
preserve_attrs = True
progress_meter = True
proxy_host =
proxy_port = 0
public_url_use_https = False
put_continue = False
recursive = False
recv_chunk = 65536
reduced_redundancy = False
requester_pays = False
restore_days = 1
restore_priority = Standard
secret_key = 4w4I3bOX0bFZ6YqCWOoDhJ03eTHIAUMohf6wDZC6
send_chunk = 65536
server_side_encryption = False
signature_v2 = False
signurl_use_https = False
simpledb_host = sdb.amazonaws.com
skip_existing = False
socket_timeout = 300
ssl_client_cert_file =
ssl_client_key_file =
stats = False
stop_on_error = False
storage_class =
throttle_max = 100
upload_id =
urlencoding_mode = normal
use_http_expect = False
use_https = False
use_mime_magic = True
verbosity = WARNING
website_endpoint = http://%(bucket)s.s3-website-%(location)s.amazonaws.com/
website_error =
website_index = index.html
EOF

c. 创建bucket

s3cmd mb s3://test-bucket
Bucket 's3://test-bucket/' created
s3cmd ls
2022-07-17 10:16  s3://test-bucket

d. 上传测试

# 们ceph-ansible安装配置文件上传上去
s3cmd put --recursive /opt/ansible s3://test-bucket/

c. 下载文件

s3cmd get s3://test-bucket/ceph-ansible/hosts
download: 's3://test-bucket/ceph-ansible/hosts' -> './hosts'  [1 of 1]
 448 of 448   100% in    0s    37.97 KB/s  done

转载请注明来源, 欢迎对文章中的引用来源进行考证, 欢迎指出任何有错误或不够清晰的表达, 可以邮件至 chinaops666@gmail.com
相册