site.yml

---
## Preparations
## Making sure python exists on all nodes, so Ansible will be able to run; make sure min vars are defined
- hosts: all
  gather_facts: no
  become: yes
  become_method: sudo
  pre_tasks:
  ## It would be best to have ansible already installed on all machines. 
  ## But if it is not, we'll try to do it:
  - name: when no python2, install python2 for Ansible<2.8 (usually required on ubuntu, which defaults to python3) # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
    raw: test -e /usr/bin/python || (apt -y update && apt install -y python-minimal) || (yum install -y python2 python-simplejson)
    register: output
    changed_when: output.stdout != ""
    tags: always
    when:
    - ansible_version.full is version_compare('2.8', '<')
    - ( ansible_python_interpreter is not defined or ansible_python_interpreter == "/usr/bin/python" )
    # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
    ignore_errors: true
    ## reason for ignore_errors: true
    ## "version_compare" was replaced with "version" starting ansible 2.5;
    ## CentOS/RHEL 7.x use ansible 2.4, so not able to grasp what version_compare is.
    ## Ansible 2.9 removes the version_compare and does not recognize it any longer.
    ## As our need is to add python2 only on versions before 2.8, if this fails
    ## (due to missing version_compare command), we are fine.
    ## We do not cover cases where it fails due to other reasons, but that is a reasonable risk,
    ## and that issue will be captured later in the flow.

  - name: when no python(2/3), install python3(Debian) python2(RedHat) for Ansible>=2.8 # Alternativelly, for Ubuntu machines, define var: ansible_python_interpreter=/usr/bin/python3
    raw: test -e /usr/bin/python3 || (apt -y update && apt install -y python3-minimal) || (yum install -y python3 python-simplejson)
    register: output
    changed_when: output.stdout != ""
    tags: always
    when:
    - ansible_version.full is version('2.8', '>=') or ( ansible_python_interpreter is defined and ansible_python_interpreter == "/usr/bin/python3" )
    # ansible_os_family conds. cannot be used as this is before gathering facts (where ansible is required)
    ignore_errors: true
    ## reason for ignore_errors: true
    ## is similar to the one explained above (complements it)

  - name: Make sure proxy_env map is defined, even if not required; must be a map, e.g. empty map
    set_fact:
      proxy_env: '{{ proxy_env |default({}) }}'
      cacheable: yes
    tags: always
    when: proxy_env is not defined

  - setup: # aka gather_facts
    tags: always # required for tags, see ansible issue: #14228
    
  - name: test min. vars (group_vars/all) are set, like ClusterConfiguration (and k8s_network_addons_urls if netw is not via helm chart)
    debug: msg='Make sure min. vars (group_vars/all) are set, like ClusterConfiguration (and k8s_network_addons_urls if netw is not via helm chart)'
    when: 
    - ClusterConfiguration is not defined
    - JoinConfiguration is not defined
    failed_when: 
    - ClusterConfiguration is not defined
    - JoinConfiguration is not defined
    tags: always # always check if we have vars in place

## proper reset of any previous cluster (if any)
- hosts: primary-master
  become: yes
  become_method: sudo
  tags:
  - reset
  - master
  roles:
  #- { role: helm, task: helm_reset, tags: [ 'reset', 'helm_reset' ] } # in helm3 is no longer required
  - { role: storage, task: remove_pvs, tags: [ 'reset', 'storage_reset', 'pvs_reset' ] }
  - { role: storage, task: nfs_reset, tags: [ 'reset', 'storage_reset', 'nfs_reset' ] }
  - { role: storage, task: rook_reset, tags: [ 'reset', 'storage_reset', 'rook_reset' ] }
  - { role: tools, task: reset_drain, tags: [ 'reset', 'node_reset', 'drain', 'node_drain' ] } #done on master, affecting nodes

## nodes -> reset and install common part (for all nodes)
- hosts: nodes
  become: yes
  become_method: sudo
  tags:
  - node
  roles:
  - { role: tools, task: reset, tags: [ 'reset', 'node_reset' ], when: "inventory_hostname not in groups['masters']" }
  - { role: tools, task: weave_reset, tags: [ 'reset', 'node_reset', 'network_reset', 'weave_reset', 'weave' ], when: "inventory_hostname not in groups['masters']" }
  - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'node_install', 'node' ], when: "inventory_hostname not in groups['masters']" }

## master -> reset and install common part (for all masters - and sometimes etcd when colocated with masters)
- hosts: masters
  become: yes
  become_method: sudo
  tags:
  - master
  roles:
  - { role: tools, task: reset, tags: [ 'reset', 'master_reset' ] }
  - { role: tools, task: weave_reset, tags: [ 'reset', 'master_reset', 'network_reset', 'weave', 'weave_reset' ] }
  - { role: common, task: all, tags: [ 'common', 'install', 'common_install', 'master_install'] }

## master -> install keepalived on masters (relevat if HA)
- hosts: masters
  become: yes
  become_method: sudo
  any_errors_fatal: yes
  tags:
  - master
  - install
  - ha
  - master_install
  roles:
  - role: keepalived
    tags: [ 'master', 'install', 'master_install', 'ha', 'keepalived']
    when: 
    - ( groups['masters'] | length ) > 1 
    - ( custom.networking.masterha_type | default('vip') ) == 'vip'

- hosts: primary-master 
  name: primary-master (or master in general) - it applies to both ha and non-ha
  become: yes
  become_method: sudo
  any_errors_fatal: yes
  tags:
  - master
  - install
  - master_install
  - ha
  roles:
  - { role: primary-master, task: primary, tags: [ 'primary-master', 'master', 'install', 'master_install'] } 

- hosts: secondary-masters
  become: yes
  become_method: sudo
  any_errors_fatal: yes
  tags:
  - master
  - install
  - ha
  - master_install
  roles:
  - { role: non-primary-master, tags: [ 'secondary-masters', 'master', 'install', 'master_install', 'secondary_masters'] } 

## node -> install nodes (kubeadm join, etc)
- hosts: nodes
  become: yes
  become_method: sudo
  any_errors_fatal: yes
  tags:
  - node
  - install
  - node_install
  roles:
  - { role: non-primary-master, tags: [ 'node', 'install', 'node_install'], when: "inventory_hostname not in groups['masters']" }

## node -> label nodes (even when master is also a node)
- hosts: nodes
  become: yes
  become_method: sudo
  any_errors_fatal: yes
  tags:
  - node
  - install
  - node_install
  - label
  roles:
  - { role: tools, task: labels, tags: [ 'label'] }

## Post deploy (network, storage, taints, helm installation, helm charts deploy, any other addons)
- hosts: primary-master
  become: yes
  become_method: sudo
  tags:
  - post_deploy
  roles:
  - { role: tools, task: labels, tags: [ 'label'] }
  - { role: post_deploy, task: all, tags: [ 'post_deploy_no_helm' ] }
  - { role: storage, task: create_all, tags: [ 'storage', 'rook', 'nfs', 'vsphere' ] }
  - { role: helm, task: helm, tags: [ 'helm' ] }
  - { role: helm, task: charts_deploy, tags: [ 'helm', 'charts_deploy' ] }

### For fixes like vsphere's bug, we have to reboot after some more fixes...
#https://github.com/vmware/kubernetes/issues/495
- hosts: mustrebootlist
  gather_facts: no
  become: yes
  become_method: sudo
  tags:
  - mustrebootlist
  - vsphere_bug_fix
  - vsphere
  roles:
  - { role: tools, task: reboot, tags: [ 'reboot_minimal' ], when: "ClusterConfiguration.cloudProvider is defined and ClusterConfiguration.cloudProvider == 'vsphere' and allow_restart | default(False) and vsphere_bug_fix is defined and vsphere_bug_fix" }

## Generic Sanity
- hosts: masters
  become: yes
  become_method: sudo
  tags:
  - master
  pre_tasks:
  - name: remove temporary mustreboot temporary group
    group:
      name: mustrebootlist
      state: absent
  roles:
  - { role: tools, task: cluster_sanity, tags: [ 'cluster_sanity', 'sanity' ] }
  - { role: tools, task: postinstall_messages, tags: [ 'cluster_sanity', 'sanity' ] }

## to reset/add only some (more) nodes:
##   1. keep in hosts only:
##      - the master
##      - the affected node (all other nodes should not be there)
##   2. Have the token defined in the group_vars/all
##   3. Run using only this/these tag(s):
## ansible-playbook -i hosts -v site.yml --tags "node"   # same with: ansible-playbook -i hosts -v site.yml --tags "node_reset,node_install,cluster_sanity,cluster_info"

## To get cluster info/sanity:
## ansible-playbook -i hosts -v site.yml --tags "cluster_sanity,cluster_info"