phalanx/applications/prompt-keda-lsstcam/values-usdfprod-prompt-processing.yaml at 66f54842fe0ddefe6a13f1aedada8bdd78ef9b0c · lsst-sqre/phalanx · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
prompt-keda:
  image:
    pullPolicy: IfNotPresent
    tag: 6.5.0

  worker:
    # TODO: need to adjust this once we know how leaky the LSSTCam pipeline is
    # restart: 7
    # Expect much less memory usage/leakage for Single-Frame only
    restart: 30

  instrument:
    pipelines:
      # IMPORTANT: don't use flow-style mappings (i.e., {}) in pipelines specs
      # if the result (including any comments) is longer than 72 characters.
      # The config will get corrupted after template substitution.
      # Block-style mappings can have lines of any length.
      main: |-
        - survey: BLOCK-365  # FBS SV Field Survey
          pipelines:
          # TODO: enable ApPipe only when we're ready (resources, alerts, etc.)
          - ${PROMPT_PROCESSING_DIR}/pipelines/LSSTCam/SingleFrame.yaml
          - ${PROMPT_PROCESSING_DIR}/pipelines/LSSTCam/Isr.yaml
        - survey: BLOCK-T427  # Daytime checkout
          pipelines: ['${PROMPT_PROCESSING_DIR}/pipelines/LSSTCam/Isr-cal.yaml']
        - survey: BLOCK-351  # Bright star test
          pipelines: ['${PROMPT_PROCESSING_DIR}/pipelines/LSSTCam/Isr-cal.yaml']
        - survey: BLOCK-T451  # Initial alignment
          pipelines: ['${PROMPT_PROCESSING_DIR}/pipelines/LSSTCam/Isr-cal.yaml']
        # Hexapod tests
        - survey: BLOCK-T413
          pipelines: ['${PROMPT_PROCESSING_DIR}/pipelines/LSSTCam/Isr-cal.yaml']
        - survey: BLOCK-T414
          pipelines: ['${PROMPT_PROCESSING_DIR}/pipelines/LSSTCam/Isr-cal.yaml']
        # Manual observations during SV blocks?
        - survey: BLOCK-T407
          pipelines: []
        - survey: BLOCK-T364
          pipelines: []
        - survey: BLOCK-T365
          pipelines: []
        - survey: BLOCK-T366
          pipelines: []
        - survey: BLOCK-T367
          pipelines: []
        - survey: BLOCK-T368
          pipelines: []
        - survey: BLOCK-T369
          pipelines: []
        - survey: BLOCK-T373
          pipelines: []
        - survey: BLOCK-T460
          pipelines: []
        - survey: BLOCK-T461
          pipelines: []
        # Produces nextVisits but not images
        - {survey: "BLOCK-T454", pipelines: []}
        # Miscellaneous scripts, not always images
        - {survey: "", pipelines: []}
        # Ignore unknown events
        - {pipelines: []}
      preprocessing: |-
        # TODO: run if and only if ApPipe is a possibility
        # - survey: BLOCK-365
        #   pipelines: ['${PROMPT_PROCESSING_DIR}/pipelines/LSSTCam/Preprocessing.yaml']
        - {survey: "", pipelines: []}
        # Don't preprocess anything unknown
        - {pipelines: []}
    # TODO: need to adjust this based on observed slew accuracy
    preloadPadding: 50
    calibRepo: s3://rubin-summit-users

  s3:
    imageBucket: rubin-summit
    endpointUrl: https://sdfembs3.sdf.slac.stanford.edu

  raw_microservice: http://172.24.5.158:8080/presence

  imageNotifications:
    kafkaClusterAddress: prompt-processing-2-kafka-bootstrap.kafka:9092
    topic: rubin-summit-notification
    # TODO: need to adjust this based on observed nextVisit lead time (depends on scheduler) and PP prep time
    # The shorter this is, the less capacity is wasted on canceled visits
    imageTimeout: 120

  apdb:
    config: s3://rubin-summit-users/apdb_config/cassandra/pp_apdb_lsstcam.yaml

  alerts:
    username: kafka-admin
    server: usdf-alert-stream-dev.lsst.cloud:9094
    topic: lsst-alerts

  # TODO: may need to override for debugging
  logLevel: timer.lsst.activator=DEBUG timer.lsst.daf.butler=DEBUG lsst.diaPipe=VERBOSE lsst.rbClassify=VERBOSE lsst.daf.butler=VERBOSE

  sasquatch:
    # TODO: production Sasquatch not yet ready
    endpointUrl: https://usdf-rsp-dev.slac.stanford.edu/sasquatch-rest-proxy
    namespace: lsst.prompt.prod
    auth_env: false

  keda:
    minReplicaCount: 3
    # TODO: this is scaled for SingleFrame, with 25% margin
    maxReplicaCount: 1100
    # TODO: may need to override for debugging
    # failedJobsHistoryLimit: 100

    # TODO: may need to reduce if we don't have enough capacity
    # redisStreams:
    #   expiration: 600

  initializer:
    # 6 retries is not enough time to fix, e.g., DB permissions problems
    retries: 15
    # Keep around for first few days so we can check in the morning PDT
    cleanup_delay: 21600  # 6 hours

    podAnnotations: {
      edu.stanford.slac.sdf.project/usdf-embargo: "true"
    }

  podAnnotations: {
    edu.stanford.slac.sdf.project/usdf-embargo: "true"
  }

  affinity:
    nodeAffinity:
      preferredDuringSchedulingIgnoredDuringExecution:
      - weight: 10
        preference:
          matchExpressions:
          - key: node-role.kubernetes.io/prompt-processing
            operator: Exists