-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.js
More file actions
9 lines (9 loc) · 276 KB
/
data.js
File metadata and controls
9 lines (9 loc) · 276 KB
1
2
3
4
5
6
7
8
9
var CTFL_BLITZ=[{q:'Testing is a set of activities to discover defects and evaluate the quality of software artifacts.',a:'True'},{q:'Software testing only consists of executing tests (running the software and checking results).',a:'False'},{q:'Verification checks whether the system meets users\' and stakeholders\' needs in its operational environment.',a:'False'},{q:'Validation checks whether the system meets users\' needs in its operational environment.',a:'True'},{q:'Static testing involves executing the software under test.',a:'False'},{q:'Building confidence in the quality of the test object is one of the typical test objectives.',a:'True'},{q:'Debugging and testing are the same activity.',a:'False'},{q:'When static testing finds a defect, there is no need to reproduce or diagnose a failure.',a:'True'},{q:'Testing is a form of quality assurance (QA).',a:'False'},{q:'A root cause is a fundamental reason for the occurrence of a problem.',a:'True'},{q:'The first testing principle states that testing proves the absence of defects.',a:'False'},{q:'Exhaustive testing of all inputs and conditions is always feasible in practice.',a:'False'},{q:'Defects cluster together — a small number of components usually contain most defects.',a:'True'},{q:'If the same tests are repeated many times, they become increasingly effective at detecting new defects.',a:'False'},{q:'Testing is context dependent — there is no single universally applicable testing approach.',a:'True'},{q:'Test planning consists of defining test objectives and selecting an approach to achieve them.',a:'True'},{q:'Test completion activities occur only at the very start of a project.',a:'False'},{q:'Testware is created as output work products from test activities.',a:'True'},{q:'Traceability between the test basis and testware helps evaluate coverage.',a:'True'},{q:'The test management role is mainly focused on test analysis, test design, and test execution.',a:'False'},{q:'The whole team approach means every team member can perform any task and everyone is responsible for quality.',a:'True'},{q:'Independence of testing always replaces the need for developer familiarity with the code.',a:'False'},{q:'Good testers must have analytical thinking, critical thinking, and creativity.',a:'True'},{q:'Testers from outside the organisation represent the highest level of test independence.',a:'True'},{q:'Confirmation testing checks whether fixes resolved the problem that originally caused a failure.',a:'True'},{q:'Quality Control (QC) is a product-oriented, corrective approach focused on achieving appropriate quality levels.',a:'True'},{q:'Quality Assurance (QA) is a process-oriented, preventive approach focused on implementing and improving processes.',a:'True'},{q:'Test results are used by QA to fix defects directly in the product.',a:'False'},{q:'An error made by a human always directly produces a failure in the software.',a:'False'},{q:'Defects can be found in documentation such as requirements specifications, not only in source code.',a:'True'},{q:'Some defects will never result in a failure, even if the code containing them is executed.',a:'True'},{q:'Failures can be caused by environmental conditions such as electromagnetic fields, not only by code defects.',a:'True'},{q:'The \'absence-of-defects fallacy\' means that fixing all defects guarantees the software meets user needs.',a:'False'},{q:'Early testing saves money because defects removed early do not cause subsequent defects in later work products.',a:'True'},{q:'Test monitoring involves the ongoing checking of all test activities and comparison of actual progress against the plan.',a:'True'},{q:'Test analysis answers the question \'how to test?\' in terms of measurable coverage criteria.',a:'False'},{q:'Test design answers the question \'how to test?\' by elaborating test conditions into test cases.',a:'True'},{q:'Test implementation includes creating or acquiring the testware necessary for test execution.',a:'True'},{q:'A risk register is a list of risks together with their likelihood, impact, and mitigation information.',a:'True'},{q:'The testing role is mainly focused on test planning, test monitoring, and test completion.',a:'False'},{q:'One possible drawback of independent testing is that developers may lose a sense of responsibility for quality.',a:'True'},{q:'Co-location of team members facilitates communication and interaction in the whole team approach.',a:'True'},{q:'Testing is purely a technical activity and does not need to be planned, managed, or controlled.',a:'False'},{q:'The defect clustering principle is an illustration of the Pareto principle.',a:'True'},{q:'Traceability of test results to risks can be used to evaluate the level of residual risk in a test object.',a:'True'},{q:'The choice of SDLC model has no impact on the scope and timing of test activities.',a:'False'},{q:'In sequential development models, dynamic testing can easily be performed in the earliest phases.',a:'False'},{q:'In Agile projects, extensive test automation is favoured to make regression testing easier.',a:'True'},{q:'Test-Driven Development (TDD) means tests are written after the code is written.',a:'False'},{q:'In BDD, test cases are expressed using a simple form of natural language such as Given/When/Then.',a:'True'},{q:'ATDD derives tests from acceptance criteria as part of the system design process.',a:'True'},{q:'DevOps promotes team autonomy, fast feedback, integrated toolchains, and CI/CD practices.',a:'True'},{q:'The shift-left approach means testing is performed later in the SDLC than usual.',a:'False'},{q:'Retrospectives can only be held at the very end of a project, never during iterations.',a:'False'},{q:'Component testing (unit testing) focuses on testing components in isolation.',a:'True'},{q:'System integration testing focuses on testing the interfaces between the system and other systems or services.',a:'True'},{q:'Acceptance testing focuses on validation and demonstrating readiness for deployment.',a:'True'},{q:'Functional testing evaluates the \'how well the system behaves\' quality characteristics.',a:'False'},{q:'Non-functional testing covers characteristics such as performance efficiency, usability, and security.',a:'True'},{q:'Black-box testing derives tests from the system\'s internal structure and source code.',a:'False'},{q:'White-box testing is structure-based and depends on the internal design of the test object.',a:'True'},{q:'Confirmation testing confirms that an original defect has been successfully fixed.',a:'True'},{q:'Regression testing checks that a fix has not introduced new failures elsewhere in the system.',a:'True'},{q:'Regression test suites are rarely good candidates for automation.',a:'False'},{q:'Component integration testing focuses on the interfaces and interactions between components.',a:'True'},{q:'Maintenance testing can be triggered by modifications, upgrades, or retirement of a system.',a:'True'},{q:'The scope of maintenance testing does not depend on the degree of risk of the change.',a:'False'},{q:'In iterative SDLCs, both static and dynamic testing may be performed at all test levels each iteration.',a:'True'},{q:'System testing is always performed by the development team in their own environment.',a:'False'},{q:'Beta testing is one of the main forms of acceptance testing.',a:'True'},{q:'A SDLC model defines how different development phases and activities relate to each other logically and chronologically.',a:'True'},{q:'The waterfall model is an example of an iterative development model.',a:'False'},{q:'Scrum and Kanban are examples of Agile practices.',a:'True'},{q:'In TDD, tests are written first, then code is written to satisfy the tests, and then both are refactored.',a:'True'},{q:'DevOps requires a cultural shift to bridge the gaps between development and operations.',a:'True'},{q:'Continuous integration promotes a shift-left approach by encouraging developers to submit high-quality code with component tests.',a:'True'},{q:'A shift-left approach may result in extra training and cost earlier in the process but saves effort later.',a:'True'},{q:'Retrospectives are attended only by testers — developers and product owners do not participate.',a:'False'},{q:'A benefit of retrospectives is improved cooperation between development and testing.',a:'True'},{q:'Test levels are distinguished by attributes such as test object, test objectives, test basis, and approach.',a:'True'},{q:'Component integration testing is heavily dependent on integration strategy approaches like bottom-up or top-down.',a:'True'},{q:'Operational acceptance testing verifies that the system fulfils contractual and regulatory requirements.',a:'False'},{q:'Non-functional test types can be applied at every test level, not only at system testing.',a:'True'},{q:'Maintenance testing scope depends on the degree of risk, the size of the existing system, and the size of the change.',a:'True'},{q:'In DevOps, manual testing from the user\'s perspective is completely eliminated.',a:'False'},{q:'In static testing, the software under test must be executed.',a:'False'},{q:'Static analysis can be incorporated into CI frameworks.',a:'True'},{q:'Almost any work product — including requirements and source code — can be examined by static testing.',a:'True'},{q:'Third-party executable code is usually an ideal candidate for static testing reviews.',a:'False'},{q:'Static testing can detect defects that dynamic testing cannot, such as unreachable code.',a:'True'},{q:'Static testing cannot be used for validation, only for verification.',a:'False'},{q:'Early and frequent stakeholder feedback reduces the risk of building the wrong product.',a:'True'},{q:'The \'Planning\' phase of the review process defines scope, purpose, and exit criteria for the review.',a:'True'},{q:'The scribe (recorder) in a review is responsible for deciding what will be reviewed.',a:'False'},{q:'In an inspection, the author of the work product can act as the review leader.',a:'False'},{q:'A walkthrough is led by the author of the work product.',a:'True'},{q:'A technical review is performed by technically qualified reviewers and led by a moderator.',a:'True'},{q:'Informal reviews require a formal documented output.',a:'False'},{q:'Conducting reviews on small chunks helps reviewers maintain concentration.',a:'True'},{q:'Static testing always costs more overall than finding the same defects through dynamic testing.',a:'False'},{q:'Static testing finds defects directly, while dynamic testing causes failures that must then be analysed.',a:'True'},{q:'Static testing can be used to measure quality characteristics such as maintainability without executing code.',a:'True'},{q:'Spelling checkers and readability tools are examples of static analysis tools.',a:'True'},{q:'The \'Individual review\' activity involves each reviewer assessing the work product and logging anomalies.',a:'True'},{q:'Anomalies identified during a review are always confirmed defects requiring immediate fixing.',a:'False'},{q:'The review leader decides who will be involved in the review and organises when and where it takes place.',a:'True'},{q:'An inspection is the least formal type of review and does not require documented output.',a:'False'},{q:'A key success factor for reviews is providing adequate preparation time to all participants.',a:'True'},{q:'Evaluation of individual participants\' performance should be a key objective of every review.',a:'False'},{q:'Static analysis can identify buffer overflows and security vulnerabilities before dynamic testing begins.',a:'True'},{q:'Black-box test techniques are based on the test object\'s internal structure.',a:'False'},{q:'Experience-based test techniques can detect defects that black-box and white-box techniques may miss.',a:'True'},{q:'In Equivalence Partitioning (EP), one test per partition is sufficient.',a:'True'},{q:'Equivalence partitions must not overlap and must be non-empty sets.',a:'True'},{q:'A valid partition contains values that should be processed correctly by the test object.',a:'True'},{q:'To achieve 100% EP coverage, only valid partitions need to be exercised.',a:'False'},{q:'Boundary Value Analysis (BVA) can only be used for ordered partitions.',a:'True'},{q:'2-value BVA tests each boundary value and its closest neighbour in the adjacent partition.',a:'True'},{q:'3-value BVA is less rigorous than 2-value BVA.',a:'False'},{q:'Decision table testing is an effective way to record and test complex business rules.',a:'True'},{q:'In a decision table, \'N/A\' means the condition is irrelevant for the action outcome.',a:'False'},{q:'A state transition diagram shows possible states and valid transitions of a system.',a:'True'},{q:'Valid transitions coverage (0-switch coverage) is the most widely used state transition coverage criterion.',a:'True'},{q:'Achieving full all-transitions coverage also guarantees full all-states coverage.',a:'True'},{q:'In statement testing, the aim is to exercise every executable statement at least once.',a:'True'},{q:'100% statement coverage guarantees that all decision logic branches have been tested.',a:'False'},{q:'Branch coverage subsumes statement coverage.',a:'True'},{q:'White-box testing can detect defects of omission when requirements are missing from the specification.',a:'False'},{q:'Error guessing is based on the tester\'s knowledge of how the application has worked in the past.',a:'True'},{q:'Fault attacks are an unstructured, informal approach to error guessing.',a:'False'},{q:'In exploratory testing, tests are simultaneously designed, executed, and evaluated.',a:'True'},{q:'Session-based exploratory testing is conducted within a defined time-box using a test charter.',a:'True'},{q:'Checklists used in checklist-based testing should contain items that can be checked automatically.',a:'False'},{q:'Checklist-based testing can provide guidelines and consistency when detailed test cases are absent.',a:'True'},{q:'Collaboration-based test approaches focus on defect avoidance through collaboration and communication.',a:'True'},{q:'The three critical aspects of a user story are Card, Conversation, and Confirmation (3 Cs).',a:'True'},{q:'Good user stories should satisfy the INVEST criteria: Independent, Negotiable, Valuable, Estimable, Small, Testable.',a:'True'},{q:'Acceptance criteria define the scope of a user story and serve as a basis for acceptance testing.',a:'True'},{q:'In ATDD, test cases are created after the user story has been fully implemented.',a:'False'},{q:'The Given/When/Then format is an example of scenario-oriented acceptance criteria.',a:'True'},{q:'White-box test techniques are also known as specification-based techniques.',a:'False'},{q:'Each Choice coverage requires at least one test case for each partition from each set of partitions.',a:'True'},{q:'BVA focuses on boundary values because developers are more likely to make errors there.',a:'True'},{q:'In a limited-entry decision table, all condition values are shown as Boolean (true or false).',a:'True'},{q:'A decision table can be simplified by deleting columns that contain infeasible combinations of conditions.',a:'True'},{q:'In state transition testing, a single test case can cover only one transition between states.',a:'False'},{q:'All-states coverage is stronger than valid-transitions coverage.',a:'False'},{q:'A state table explicitly shows invalid transitions as empty cells.',a:'True'},{q:'Branch testing exercises both conditional and unconditional transfers of control.',a:'True'},{q:'White-box techniques are well-suited to reviewing code that is not yet ready for execution.',a:'True'},{q:'Exploratory testing is most effective when the tester has no domain knowledge.',a:'False'},{q:'Checklist items should be regularly updated based on defect analysis to stay effective.',a:'True'},{q:'In ATDD, the first step is a specification workshop where the user story and acceptance criteria are analysed by the team.',a:'True'},{q:'Rule-oriented acceptance criteria can take the form of a bullet-point verification list or input-output mapping table.',a:'True'},{q:'ATDD test cases, when automated, become executable requirements.',a:'True'},{q:'A test plan documents the means and schedule for achieving test objectives.',a:'True'},{q:'Entry criteria define what must be achieved in order to declare an activity completed.',a:'False'},{q:'In Agile, exit criteria are often called the \'Definition of Done\'.',a:'True'},{q:'The Wideband Delphi estimation technique uses a single expert to estimate test effort.',a:'False'},{q:'Three-point estimation uses optimistic, most likely, and pessimistic estimates to calculate effort.',a:'True'},{q:'Risk-based prioritisation executes test cases covering the most important risks first.',a:'True'},{q:'The test pyramid model shows that higher-layer tests are faster and more isolated than lower-layer tests.',a:'False'},{q:'Testing Quadrant Q1 contains component and component integration tests that should be automated.',a:'True'},{q:'Testing Quadrant Q3 contains exploratory, usability, and user acceptance tests that are often manual.',a:'True'},{q:'Risk level is determined by combining risk likelihood and risk impact.',a:'True'},{q:'Product risks are related to the management and control of the project, such as delays or cost overruns.',a:'False'},{q:'Project risks include people issues such as insufficient skills, conflicts, and shortage of staff.',a:'True'},{q:'Product risk analysis should ideally begin as early as possible in the SDLC.',a:'True'},{q:'Risk monitoring aims to ensure that mitigation actions are effective and to identify emerging risks.',a:'True'},{q:'Test control uses information from test monitoring to provide corrective actions and guidance.',a:'True'},{q:'Test progress reports are typically generated once at the end of the entire project.',a:'False'},{q:'A test completion report includes unmitigated risks and lessons learned relevant to the testing.',a:'True'},{q:'Configuration management ensures all configuration items are uniquely identified and version controlled.',a:'True'},{q:'A defect report should include the severity of the defect and steps to reproduce the failure.',a:'True'},{q:'All anomalies reported during testing automatically turn out to be real defects.',a:'False'},{q:'A test plan serves as a means of communication with team members and other stakeholders.',a:'True'},{q:'Entry criteria that a user story must fulfil before development begins are called the \'Definition of Ready\'.',a:'True'},{q:'Running out of time or budget can be a valid exit criterion if stakeholders accept the risk.',a:'True'},{q:'Estimation based on ratios uses figures from previous projects to derive standard ratios for similar work.',a:'True'},{q:'In Planning Poker, estimates are made using cards with numbers representing effort size.',a:'True'},{q:'Coverage-based prioritisation always executes test cases with the lowest coverage first.',a:'False'},{q:'Testing Quadrant Q4 contains smoke tests and non-functional tests and is technology-facing.',a:'True'},{q:'Product risk analysis results are used to determine the test techniques and the coverage to be achieved.',a:'True'},{q:'Configuration management allows reversion to a previous baseline to reproduce previous test results.',a:'True'},{q:'A defect report logged during static testing has the same objectives as one logged during dynamic testing.',a:'True'},{q:'Simply acquiring a test tool guarantees immediate and lasting benefits for the project.',a:'False'},{q:'Test automation can prevent simple human errors through greater consistency and repeatability.',a:'True'},{q:'One risk of test automation is an over-reliance on tools, ignoring the need for human critical thinking.',a:'True'},{q:'Non-functional testing tools allow testers to perform non-functional testing that is difficult to do manually.',a:'True'},{q:'Test automation always eliminates the need for any manual testing in a DevOps environment.',a:'False'},{q:'Management tools increase test process efficiency by facilitating management of requirements, tests, and defects.',a:'True'},{q:'Static testing tools support the tester in performing reviews and static analysis.',a:'True'},{q:'Test automation reduces test execution time, enabling earlier defect detection and faster feedback.',a:'True'},{q:'Using an open-source test tool carries no risks because the source code is freely available.',a:'False'},{q:'Choosing a test tool that does not comply with regulatory requirements is a potential risk of test automation.',a:'True'},{q:'DevOps tools support the DevOps delivery pipeline, workflow tracking, and automated build processes.',a:'True'},{q:'A spreadsheet used to record test results is considered a test tool in the context of testing.',a:'True'},{q:'Inaccurate estimation of the time and cost required to introduce a tool is a potential risk of test automation.',a:'True'},{q:'Test execution and coverage tools facilitate automated test execution and coverage measurement.',a:'True'},{q:'Once a test automation tool is selected and implemented, it requires no further maintenance.',a:'False'}];
var CTAI_BLITZ=[{q:'The term \'AI Effect\' refers to the changing perception of what constitutes AI as society and technology advance.',a:'True'},{q:'The modern definition of AI is the same today as it was in the 1950s.',a:'False'},{q:'Deep Blue\'s chess-playing system is now widely considered true artificial intelligence because it beat the world champion.',a:'False'},{q:'Narrow AI systems are designed to perform a specific task with limited context.',a:'True'},{q:'As of 2021, general AI systems with human-like cognitive abilities across all domains have been realized.',a:'False'},{q:'Super AI systems are expected to quickly become wiser than humans once they reach the technological singularity.',a:'True'},{q:'In a conventional system, it is relatively easy for humans to understand how inputs are transformed into outputs.',a:'True'},{q:'AI-based systems using machine learning use explicit if-then-else rules to determine behaviour.',a:'False'},{q:'Fuzzy logic and decision trees are both listed as AI technologies.',a:'True'},{q:'TensorFlow is an open-source ML framework provided by Google.',a:'True'},{q:'PyTorch is an open-source ML library operated by Microsoft.',a:'False'},{q:'GPUs typically outperform CPUs for ML tasks because they have thousands of cores suited to massively parallel processing.',a:'True'},{q:'General-purpose CPUs are the best hardware choice for training large ML models.',a:'False'},{q:'ASICs designed for AI are most suitable for edge computing where the trained model runs on individual devices.',a:'True'},{q:'AIaaS SLAs typically define ML functional performance metrics such as accuracy in the same way they define uptime.',a:'False'},{q:'AIaaS allows organisations to implement AI using cloud services even without sufficient in-house resources.',a:'True'},{q:'A pre-trained model can be embedded in an AI-based system without modification.',a:'True'},{q:'Transfer learning takes a pre-trained model and modifies it to perform a different requirement.',a:'True'},{q:'In transfer learning, the later layers of a neural network are typically reused unchanged while early layers are retrained.',a:'False'},{q:'A risk of pre-trained models is that inherited biases may not be apparent if documentation about training data is lacking.',a:'True'},{q:'Models created through transfer learning are unlikely to share the same vulnerabilities as the original pre-trained model.',a:'False'},{q:'The GDPR requires that every single prediction made by an AI system must be accurate.',a:'False'},{q:'ISO/IEC JTC 1/SC42, a subcommittee on AI, was set up in 2017.',a:'True'},{q:'ISO 26262 is a regulatory standard applicable to automotive systems.',a:'True'},{q:'Standards in isolation are mandatory for all organizations developing AI systems.',a:'False'},{q:'Keras is a high-level Python API capable of running on top of TensorFlow and CNTK.',a:'True'},{q:'Neuromorphic processors use the traditional von Neumann architecture.',a:'False'},{q:'A hybrid AIaaS approach combines AI functionality provided internally with some provided as an external service.',a:'True'},{q:'The ImageNet dataset contains over 14 million images classified into over 1000 categories.',a:'True'},{q:'Several of the risks of pre-trained models can be mitigated by thorough documentation of the model.',a:'True'},{q:'Flexibility is the ability of the system to be used in situations not part of the original requirements.',a:'True'},{q:'Adaptability and flexibility are identical concepts with no distinction in the CT-AI syllabus.',a:'False'},{q:'In the CT-AI syllabus, autonomy means the system works independently of human oversight for prolonged periods.',a:'True'},{q:'Fully autonomous systems are the desired goal for all AI-based systems.',a:'False'},{q:'Evolution in AI systems is the ability to improve in response to changing external constraints.',a:'True'},{q:'Sample bias occurs when the training data is not fully representative of the operational data space.',a:'True'},{q:'Algorithmic bias can be managed by hyperparameter tuning of the ML algorithm.',a:'True'},{q:'Inappropriate bias in AI systems has been reported in bank lending and recruitment systems.',a:'True'},{q:'Reward hacking occurs when the system achieves its goal through a solution that perverts the designer\'s intent.',a:'True'},{q:'Negative side effects occur when the system fails to achieve its specified goal.',a:'False'},{q:'Transparency in XAI refers to how easily users can determine how the system arrives at a result.',a:'False'},{q:'Explainability in XAI refers to the ease with which users can determine how the AI system arrives at a result.',a:'True'},{q:'Self-learning AI systems must be adaptable and flexible by nature.',a:'True'},{q:'Complexity and non-determinism are characteristics that make it harder to ensure AI systems are safe.',a:'True'},{q:'The OECD AI principles, issued in 2019, were the first international AI standards agreed by governments.',a:'True'},{q:'In supervised learning, the algorithm creates the model from labelled data.',a:'True'},{q:'Classification is used when the output is a continuous numeric value.',a:'False'},{q:'Regression in ML predicts a numeric output — this is the same usage as \'regression testing\' in ISTQB testing syllabi.',a:'False'},{q:'In unsupervised learning, the algorithm creates the model from unlabelled data.',a:'True'},{q:'Clustering groups data based on similarities without prior labels.',a:'True'},{q:'Association identifies interesting relationships or dependencies among data attributes.',a:'True'},{q:'Reinforcement learning uses labelled training data to learn correct behaviour.',a:'False'},{q:'Robotics and chatbots are typical applications of reinforcement learning.',a:'True'},{q:'The first step in the ML workflow is to train the model.',a:'False'},{q:'Model hyperparameters define the model structure, such as the number of layers in a neural network.',a:'True'},{q:'Algorithm hyperparameters control the training process, such as the number of epochs.',a:'True'},{q:'The ML workflow is strictly sequential and steps are never repeated in practice.',a:'False'},{q:'Overfitting produces a model that performs well on training data but poorly on new data.',a:'True'},{q:'Underfitting occurs when the model is too complex and memorises noise in the training data.',a:'False'},{q:'If the output of a problem is numeric and continuous, regression is likely the appropriate ML approach.',a:'True'},{q:'Reinforcement learning is best suited to contexts involving interaction with an environment and decisions at multiple states.',a:'True'},{q:'There is always one optimal ML algorithm for a given problem.',a:'False'},{q:'After deployment, a model should be monitored and tuned to address concept drift.',a:'True'},{q:'The ML workflow diagram includes a \'Monitor and Tune the Model\' step after deployment.',a:'True'},{q:'A/B testing can be used to compare an updated model against the previous version during the monitor phase.',a:'True'},{q:'Data preparation uses an average of 43% of the ML workflow effort.',a:'True'},{q:'Model selection and building uses more effort than data preparation in a typical ML project.',a:'False'},{q:'Data augmentation increases the number of samples in a dataset and can help provide robustness against adversarial attacks.',a:'True'},{q:'Feature selection removes irrelevant features, which can reduce training time and prevent overfitting.',a:'True'},{q:'Feature extraction creates new informative features from existing ones, typically resulting in a smaller dataset.',a:'True'},{q:'EDA uses data analysis and visualisation to discover trends in the data.',a:'True'},{q:'Three datasets are logically required: training, validation, and test.',a:'True'},{q:'The test (holdout) dataset is used during model evaluation and tuning.',a:'False'},{q:'A typical training:validation:test split guideline ranges from 60:20:20 to 80:10:10.',a:'True'},{q:'K-fold cross validation is used when there is limited data available.',a:'True'},{q:'Wrong data means the captured data was incorrect, for example due to a faulty sensor.',a:'True'},{q:'Unbalanced data can result from inappropriate bias such as race or gender bias in data collection.',a:'True'},{q:'Duplicate data records can unduly influence the resultant ML model.',a:'True'},{q:'Poor data quality can result in a compromised model with security vulnerabilities.',a:'True'},{q:'Data labelling is a minor activity that uses around 5% of time on ML projects.',a:'False'},{q:'Annotation refers to labelling objects in images, for example by drawing rectangles around them.',a:'True'},{q:'In crowdsourced labelling, multiple annotators may label the same data to manage quality.',a:'True'},{q:'AI-Assisted labelling requires no human review of the results.',a:'False'},{q:'Mislabelled data can result from random errors, systemic errors, deliberate errors, or translation errors.',a:'True'},{q:'Supervised learning assumes all data in the dataset is always correctly labelled in practice.',a:'False'},{q:'A False Positive is when the model predicts positive but the actual value is negative.',a:'True'},{q:'A False Negative is when the model predicts negative but the actual value is positive.',a:'True'},{q:'Accuracy measures the percentage of all correct classifications.',a:'True'},{q:'Precision measures the proportion of actual positives that were predicted correctly.',a:'False'},{q:'Recall (sensitivity) measures the proportion of actual positives that were predicted correctly.',a:'True'},{q:'The F1-score is the arithmetic mean of precision and recall.',a:'False'},{q:'A high F1-score (close to 1) indicates that false data has little influence on the result.',a:'True'},{q:'The ROC curve plots true positive rate against false positive rate.',a:'True'},{q:'A higher AUC value means the model\'s predictions are better.',a:'True'},{q:'MSE is used for supervised regression models and a value closer to zero is better.',a:'True'},{q:'The silhouette coefficient of -1 means clusters are well-separated.',a:'False'},{q:'Intra-cluster metrics measure the similarity of data points within a cluster.',a:'True'},{q:'ML functional performance metrics also measure non-functional quality characteristics such as performance efficiency.',a:'False'},{q:'Accuracy is a poor metric when one class of data dominates the dataset.',a:'True'},{q:'Precision should be prioritised when the cost of false positives is high.',a:'True'},{q:'Recall should be prioritised when it is critical that no actual positives are missed.',a:'True'},{q:'F1-score is most useful when the dataset is perfectly balanced and precision and recall are equally important.',a:'False'},{q:'ML benchmark suites provide objective comparisons between different AI technologies and hardware platforms.',a:'True'},{q:'ML functional performance metrics evaluate the entire system including the data pipeline.',a:'False'},{q:'AUC for the ROC curve is applicable to supervised classification problems.',a:'True'},{q:'Artificial neural networks were initially designed to mimic the functioning of the human brain.',a:'True'},{q:'A deep neural network has only an input layer and an output layer with no hidden layers.',a:'False'},{q:'The activation value of a neuron is calculated using an activation function, connection weights, and the neuron\'s bias.',a:'True'},{q:'During training, connection weights are adjusted to minimise the difference between actual and expected outputs.',a:'True'},{q:'Traditional white-box code coverage (e.g., statement coverage) is highly effective for testing neural networks.',a:'False'},{q:'Neuron coverage requires each neuron to achieve an activation value greater than zero.',a:'True'},{q:'Threshold coverage is stricter than basic neuron coverage as it requires activation values to exceed a specified threshold.',a:'True'},{q:'Sign-Change coverage requires test cases to cause each neuron to achieve both positive and negative activation values.',a:'True'},{q:'Sign-Sign coverage is conceptually similar to MC/DC coverage for imperative source code.',a:'True'},{q:'Neural network coverage is a mature research area with extensive objective evidence of effectiveness available since the 1970s.',a:'False'},{q:'AI-based system specifications are typically easier to define than conventional system specifications.',a:'False'},{q:'Two new specialised test levels for AI-based systems are input data testing and ML model testing.',a:'True'},{q:'The objective of input data testing is to ensure data used for training and prediction is of the highest quality.',a:'True'},{q:'ML model testing evaluates the model against agreed ML functional performance criteria.',a:'True'},{q:'Component integration testing for AI systems checks that inputs from the data pipeline are received correctly by the model.',a:'True'},{q:'During system testing, ML functional performance criteria do not need to be re-tested.',a:'False'},{q:'Where AI is provided as a service, API testing is normally performed as part of component integration testing.',a:'True'},{q:'Big data (high-volume, high-velocity, high-variety) can be difficult to create and manage as test data.',a:'True'},{q:'Automation bias means humans are always more accurate when assisted by AI recommendations.',a:'False'},{q:'The second form of automation bias is where the human misses a system failure by not adequately monitoring the system.',a:'True'},{q:'AI component documentation should include details on known bias, ethical issues, and concept drift.',a:'True'},{q:'Concept drift causes model outputs to become increasingly less accurate as the operational environment changes.',a:'True'},{q:'Systems prone to concept drift should only be tested once a year.',a:'False'},{q:'After retraining to address concept drift, the new model may be compared to the old one using A/B testing.',a:'True'},{q:'The test approach for an ML system is based on a risk analysis including both conventional and AI-specific testing.',a:'True'},{q:'Overfitting in a deployed model may be detected by testing with a dataset completely independent from the training data.',a:'True'},{q:'Acceptance testing for AIaaS may be needed to determine whether ML functional performance criteria are sufficiently met.',a:'True'},{q:'When testers use the same data pipeline implementation as data scientists, defects in that pipeline may be masked.',a:'True'},{q:'The probabilistic nature of many AI systems makes it necessary to specify tolerances in quality requirements.',a:'True'},{q:'Quality characteristics such as adaptability and autonomy are easy to define and test due to established standards.',a:'False'},{q:'A self-learning system may change its behaviour in ways that make previously passing tests fail.',a:'True'},{q:'Testing self-learning systems may require automated continuous testing because changes can happen faster than manual execution.',a:'True'},{q:'Testing autonomous systems requires creating conditions for the system to exercise its decision-making about human intervention.',a:'True'},{q:'Boundary value analysis applied to the operating environment can help generate conditions for autonomy testing.',a:'True'},{q:'The LIME method provides a definitive reason for every model output.',a:'False'},{q:'Transparency can be tested by comparing documented information on data and algorithm to the actual implementation.',a:'True'},{q:'Testing interpretability and explainability typically uses user surveys and questionnaires.',a:'True'},{q:'Probabilistic and non-deterministic AI systems always produce the same result for the same inputs.',a:'False'},{q:'When testing probabilistic systems, running a test several times helps generate a statistically valid result.',a:'True'},{q:'A test oracle is the source used to determine the expected result of a test.',a:'True'},{q:'The test oracle problem is straightforward to solve for complex, non-deterministic AI systems.',a:'False'},{q:'A/B testing, back-to-back testing, and metamorphic testing can all help alleviate the test oracle problem.',a:'True'},{q:'Human experts used as test oracles always agree with each other when presented with the same information.',a:'False'},{q:'Testing for sample bias may include reviewing the source and pre-processing of training data.',a:'True'},{q:'ML systems can create unwanted bias using combinations of seemingly unrelated features.',a:'True'},{q:'For the \'Evolution\' quality characteristic, tests should check how well the system copes with concept drift.',a:'True'},{q:'For \'Reward Hacking\', independent tests should use the same success measure as the intelligent agent being tested.',a:'False'},{q:'Safety testing of AI systems may need to be carried out in a virtual test environment.',a:'True'},{q:'Repeating the same tests on a self-learning system may cause it to adapt, influencing its long-term behaviour.',a:'True'},{q:'The complexity of AI-based systems increases when multiple components each provide probabilistic results.',a:'True'},{q:'When an AI system is tested as a black box, the visible internal structure provides useful additional testing information.',a:'False'},{q:'Acceptance criteria for probabilistic systems can always be evaluated with precise, exact values.',a:'False'},{q:'Insufficient specification of the operational environment is a key challenge when testing self-learning systems.',a:'True'},{q:'For \'Freedom from inappropriate bias\', comparing test results with census data can check for bias on inferred variables.',a:'True'},{q:'Checking ethics of an AI system can involve using the EC Assessment List for Trustworthy AI as a checklist.',a:'True'},{q:'An adversarial attack involves subtly perturbing valid inputs to cause the model to produce incorrect predictions.',a:'True'},{q:'Adversarial examples were first noticed with image classifiers.',a:'False'},{q:'Adversarial examples are generally transferable between ML systems trained for the same task.',a:'True'},{q:'In a white-box adversarial attack, the attacker has full knowledge of the algorithm, settings, and parameters.',a:'True'},{q:'In a black-box adversarial attack, the attacker builds a duplicate model and uses white-box techniques on it.',a:'True'},{q:'Adversarial testing aims to identify vulnerabilities so preventative measures can be taken.',a:'True'},{q:'Data poisoning attacks only target the trained model, not the training data.',a:'False'},{q:'The Microsoft Tay chatbot is a well-known example of a data poisoning attack.',a:'True'},{q:'EDA can help detect data poisoning because poisoned data may appear as outliers.',a:'True'},{q:'Pairwise testing tests every possible combination of parameter values.',a:'False'},{q:'Pairwise testing significantly reduces the number of test cases needed while maintaining defect detection capability.',a:'True'},{q:'Back-to-back testing uses an alternative version of the system as a pseudo-oracle to compare outputs.',a:'True'},{q:'For pseudo-oracles to be effective, the pseudo-oracle and SUT should share as much code as possible.',a:'False'},{q:'A/B testing compares two variants of a system to determine which performs better.',a:'True'},{q:'A/B testing generates detailed test cases and provides guidance on how tests should be designed.',a:'False'},{q:'A/B testing can be used to test self-learning systems by comparing system characteristics before and after a change.',a:'True'},{q:'Metamorphic testing generates follow-up test cases based on a metamorphic relation applied to a source test case.',a:'True'},{q:'In metamorphic testing, the expected results of follow-up test cases are always identical to the source test case.',a:'False'},{q:'Research shows that three to six diverse metamorphic relations can reveal over 90% of detectable defects.',a:'True'},{q:'Metamorphic testing requires commercial tool support and is not yet applicable manually.',a:'False'},{q:'Exploratory testing is especially useful for AI-based systems due to poor specifications and test oracle problems.',a:'True'},{q:'A tour in exploratory testing is a set of strategies and goals organised around a special focus.',a:'True'},{q:'EDA involves interactive, hypothesis-driven exploration of data for patterns, trends, and outliers.',a:'True'},{q:'Adversarial testing is appropriate when mishandling adversarial examples could have significant impact.',a:'True'},{q:'Back-to-back testing compares two variants of the same system to choose the better one.',a:'False'},{q:'Virtual test environments allow dangerous scenarios to be tested without endangering people or the environment.',a:'True'},{q:'Virtual test environments can only be run one at a time, not in parallel.',a:'False'},{q:'Virtual test environments provide better observability because all digital parts of the environment can be monitored.',a:'True'},{q:'Big data requirements in AI test environments need careful planning to set up correctly.',a:'True'},{q:'AI-specific hardware such as AI-specific processors never needs to be included in the test environment.',a:'False'},{q:'AI can support defect triage by using NLP and clustering algorithms to categorise and identify duplicate defects.',a:'True'},{q:'AI-based test case generation always solves the test oracle problem automatically.',a:'False'},{q:'AI-based optimisation of regression test suites can reduce suite size by around 50% while still detecting most defects.',a:'True'},{q:'Defect prediction using ML is most effective when based on prior experience with a similar codebase or developers.',a:'True'},{q:'The best predictors for defect prediction have been found to be source code metrics such as lines of code.',a:'False'},{q:'Visual testing uses image recognition to interact with GUI objects through the same interface as an actual user.',a:'True'},{q:'AI-based GUI tools can check that the user interface works correctly across different browsers and platforms.',a:'True'},{q:'Specifying test oracles and challenging stakeholder assumptions are currently practical AI testing tasks.',a:'False'},{q:'AI can assist in optimising regression test suites by analysing previous test results and recent code changes.',a:'True'},{q:'A distinction should be drawn between narrow AI and general AI when discussing what AI can do for testing today.',a:'True'}];
var GENAI_BLITZ=[{q:'GenAI is a branch of AI that uses large pre-trained models to generate human-like output such as text, images, or code.',a:'True'},{q:'Symbolic AI uses neural networks to automatically learn features from large datasets.',a:'False'},{q:'LLMs are trained on very large textual datasets including books, articles, and websites.',a:'True'},{q:'Tokenization breaks text into smaller units called tokens, which can range from a character to a sub-word or word.',a:'True'},{q:'Embeddings are plain text representations of tokens stored in relational databases.',a:'False'},{q:'The transformer model predicts the next token in a sequence to generate coherent text.',a:'True'},{q:'LLMs always produce the same output when given identical input, making them fully deterministic.',a:'False'},{q:'A larger context window always reduces computational complexity.',a:'False'},{q:'Foundation LLMs are general-purpose models that typically require further adaptation for specific tasks.',a:'True'},{q:'Instruction-tuned LLMs are fine-tuned using datasets that pair prompts with expected responses.',a:'True'},{q:'Reasoning LLMs focus on logical inference, multi-step problem-solving, and chain-of-thought reasoning.',a:'True'},{q:'Multimodal LLMs can only process text input.',a:'False'},{q:'Vision-language models are a subset of multimodal LLMs that integrate visual and textual information.',a:'True'},{q:'Classical machine learning requires manual feature selection and model training.',a:'True'},{q:'GenAI models require an additional training phase before they can be applied to test tasks.',a:'False'},{q:'LLMs can support test tasks such as generating test cases, analyzing defect patterns, and producing synthetic test data.',a:'True'},{q:'Small language models (SLMs) have more parameters than LLMs.',a:'False'},{q:'Tokens with similar meanings have embeddings positioned closely together in high-dimensional space.',a:'True'},{q:'AI chatbots and LLM-powered applications are two ways testers can leverage GenAI.',a:'True'},{q:'Deep learning requires users to manually define all features before training.',a:'False'},{q:'A structured prompt for software testing typically includes six components: role, context, instruction, input data, constraints, and output format.',a:'True'},{q:'The \'role\' component of a prompt defines the expected output format of the LLM response.',a:'False'},{q:'Context in a prompt provides background information about the test object and specific functionality to be tested.',a:'True'},{q:'Constraints outline any restrictions or special considerations the LLM should adhere to.',a:'True'},{q:'Output format specifications have no effect on the shape of the LLM\'s response.',a:'False'},{q:'Prompt chaining breaks a complex task into a series of intermediate steps using multiple prompts.',a:'True'},{q:'In prompt chaining, the result of each step is checked before proceeding to the next step.',a:'True'},{q:'Zero-shot prompting provides one example to demonstrate the desired outcome.',a:'False'},{q:'Few-shot prompting provides more than one example to guide the model\'s response behavior.',a:'True'},{q:'Meta prompting leverages the AI\'s ability to generate or refine its own prompts.',a:'True'},{q:'Meta prompting is useful when the tester is unsure how to craft an effective prompt.',a:'True'},{q:'One-shot prompting provides exactly one example to the model.',a:'True'},{q:'A system prompt is visible and editable by the chatbot user in most interfaces.',a:'False'},{q:'The system prompt stays constant throughout an interaction session.',a:'True'},{q:'User prompts represent the actual input or question from the chatbot\'s user.',a:'True'},{q:'System prompts can contain role, context, and constraints components of a structured prompt.',a:'True'},{q:'GenAI can help identify ambiguities and inconsistencies in requirements during test analysis.',a:'True'},{q:'LLMs can suggest test techniques such as boundary value analysis or equivalence partitioning.',a:'True'},{q:'GenAI cannot perform coverage analysis because it does not understand requirements.',a:'False'},{q:'LLMs can prioritize test conditions based on risk likelihood and impact of failure.',a:'True'},{q:'GenAI can generate test cases for both functional and non-functional testing requirements.',a:'True'},{q:'AI-generated test data cannot preserve data privacy because it is based on real data.',a:'False'},{q:'GenAI can generate automated test scripts compatible with various test automation frameworks.',a:'True'},{q:'Prompt chaining is most suitable for repetitive tasks with a specific output format.',a:'False'},{q:'Few-shot prompting is particularly effective for tasks requiring a specific, constrained output pattern.',a:'True'},{q:'Meta prompting is useful for flexible, dynamic tasks where the tester needs to craft new prompts.',a:'True'},{q:'Multiple prompting techniques can be combined for a single use case.',a:'True'},{q:'GenAI can analyze code changes to identify high-risk areas for targeted regression testing.',a:'True'},{q:'Self-healing tests automatically adjust test scripts to handle minor UI or API changes.',a:'True'},{q:'GenAI cannot assist with test monitoring because it only generates text.',a:'False'},{q:'GenAI can help generate dynamic dashboards and natural language summaries of test metrics.',a:'True'},{q:'Accuracy measures the overall correctness of GenAI output against expert-written test cases.',a:'True'},{q:'Precision evaluates the ability to identify all relevant instances within a dataset.',a:'False'},{q:'Recall measures the model\'s ability to identify all relevant instances in a dataset.',a:'True'},{q:'Diversity ensures a wide range of inputs and scenarios are covered, avoiding repetition.',a:'True'},{q:'Execution Success Rate measures how many generated test scripts run without syntax errors.',a:'True'},{q:'Time Efficiency evaluates the time saved compared to manual test efforts.',a:'True'},{q:'Prompt evaluation metrics should be based on statistically relevant data due to GenAI\'s non-deterministic nature.',a:'True'},{q:'A/B testing of prompts involves creating multiple prompt versions and evaluating which produces better results.',a:'True'},{q:'Output analysis examines AI-generated output for inaccuracies against the test basis.',a:'True'},{q:'Iterative prompt modification starts with a finished prompt and is never changed again.',a:'False'},{q:'Shorter prompts always produce better results than longer prompts.',a:'False'},{q:'Sharing prompt libraries across the test team helps standardize techniques and maintain consistent quality.',a:'True'},{q:'GenAI can generate Gherkin-style test cases using few-shot prompting.',a:'True'},{q:'In keyword-driven automation, predefined keywords represent common test steps.',a:'True'},{q:'Test case prioritization by GenAI can consider risk, dependencies, and test objectives.',a:'True'},{q:'GenAI can help produce test completion reports highlighting successes and lessons learned.',a:'True'},{q:'GenAI for test monitoring can analyze trends and predict potential risks.',a:'True'},{q:'LLMs can generate test oracle outputs, i.e., expected results for test cases.',a:'True'},{q:'The quality of LLM output for test tasks is independent of the quality of the input provided.',a:'False'},{q:'Multimodal prompting can combine GUI wireframes and user stories to generate acceptance criteria.',a:'True'},{q:'Prompt chaining with human verification is useful for progressively refining acceptance criteria.',a:'True'},{q:'GenAI cannot help with API regression testing because APIs are not text-based.',a:'False'},{q:'LLMs can generate test scripts that are then updated or extended based on new requirements.',a:'True'},{q:'Relevance and contextual fit determines whether GenAI output is applicable for a given context.',a:'True'},{q:'GenAI chatbots are accessible only to technical stakeholders.',a:'False'},{q:'Prompt engineering is not required when using LLM-powered testing applications.',a:'False'},{q:'GenAI can classify test anomalies based on severity and priority.',a:'True'},{q:'A system prompt may define the LLM\'s tone, domain-specific rules, and operational parameters.',a:'True'},{q:'Hallucinations occur when an LLM generates output that appears factually incorrect or irrelevant.',a:'True'},{q:'Reasoning errors occur when LLMs correctly interpret all logical structures including conditional logic.',a:'False'},{q:'LLM biases come from the data on which the model was trained.',a:'True'},{q:'Hallucinations in software testing can manifest as fictitious test cases or non-functioning test scripts.',a:'True'},{q:'LLMs possess true logical reasoning capabilities identical to human reasoning.',a:'False'},{q:'Non-deterministic LLM behavior makes it easy to permanently fix hallucinations.',a:'False'},{q:'Cross-verification compares AI-generated output with existing documentation to detect hallucinations.',a:'True'},{q:'Domain expertise consultation can help validate the accuracy of AI-generated content.',a:'True'},{q:'Reasoning errors can be detected by evaluating the logical flow and coherence of AI-generated content.',a:'True'},{q:'Bias detection includes reviewing whether generated test data is fairly and accurately represented.',a:'True'},{q:'Providing complete context in a prompt is a mitigation technique for hallucinations.',a:'True'},{q:'Lowering the LLM temperature parameter reduces randomness and results in more consistent outputs.',a:'True'},{q:'Setting a random seed guarantees 100% reproducibility in all LLM implementations.',a:'False'},{q:'Dividing prompts into smaller segments using prompt chaining can help detect reasoning errors early.',a:'True'},{q:'Using clear, interpretable data formats helps the model focus on essential task aspects.',a:'True'},{q:'Data exfiltration is a security attack that attempts to extract confidential training data from an LLM.',a:'True'},{q:'Unintentional data exposure occurs when GenAI outputs accidentally reveal sensitive information.',a:'True'},{q:'GDPR explicitly restricts all applications of GenAI in testing environments.',a:'False'},{q:'Data minimization involves using only the necessary amount of non-sensitive data in AI testing.',a:'True'},{q:'Data anonymization replaces sensitive information with non-identifiable data.',a:'True'},{q:'Regular security audits and vulnerability assessments are recommended for GenAI systems.',a:'True'},{q:'Training and processing LLMs requires minimal computing resources.',a:'False'},{q:'Generating a single image with a powerful AI model can consume more energy than generating text.',a:'True'},{q:'The cumulative environmental impact of GenAI across millions of users is negligible.',a:'False'},{q:'ISO/IEC 42001:2023 specifies requirements for managing AI systems within an organization.',a:'True'},{q:'The EU AI Act classifies AI applications by risk level and mandates compliance in transparency.',a:'True'},{q:'The NIST AI Risk Management Framework focuses on fairness, transparency, and security.',a:'True'},{q:'Selecting an appropriate GenAI model for the specific task is a mitigation strategy for hallucinations.',a:'True'},{q:'Comparing results across multiple LLMs helps detect output errors.',a:'True'},{q:'Higher LLM temperature always improves the consistency of outputs.',a:'False'},{q:'An LLM-powered test infrastructure integrates an LLM to enhance automation, reasoning, and decision-making in testing.',a:'True'},{q:'The front-end of an LLM-powered test infrastructure is where testers input queries or commands.',a:'True'},{q:'Vector databases are used for semantic retrieval of related content using embeddings.',a:'True'},{q:'The back-end of an LLM test infrastructure only handles authentication and nothing else.',a:'False'},{q:'RAG enhances LLMs by incorporating additional data sources into the response generation process.',a:'True'},{q:'In RAG preprocessing, large documents are broken into smaller chunks for focused retrieval.',a:'True'},{q:'RAG retrieval is typically based on semantic similarity between embeddings of the prompt and chunks.',a:'True'},{q:'RAG allows LLMs to access enterprise data sources such as databases and documentation in real time.',a:'True'},{q:'LLM-powered agents can only handle conversational question-and-answer interactions.',a:'False'},{q:'Autonomous agents operate independently with minimal human intervention.',a:'True'},{q:'Semi-autonomous agents perform tasks with periodic human oversight.',a:'True'},{q:'Multi-agent architectures use coordinated effort among several specialized agents, known as orchestration.',a:'True'},{q:'LLM-powered agents are immune to hallucinations and reasoning errors.',a:'False'},{q:'Fine-tuning adapts a pre-trained model to perform specific tasks or tailor it to particular domains.',a:'True'},{q:'Fine-tuning can be applied to Small Language Models (SLMs) as well as LLMs.',a:'True'},{q:'Overfitting in fine-tuning means the model becomes too specialized and performs poorly on new data.',a:'True'},{q:'Fine-tuning always eliminates the need for high-quality training datasets.',a:'False'},{q:'LLMOps refers to practices and processes for deploying and maintaining LLMs in production environments.',a:'True'},{q:'Using an AI chatbot for testing requires no consideration of data privacy or security.',a:'False'},{q:'In-house development of GenAI test tools requires expertise in implementing LLM-powered infrastructure.',a:'True'},{q:'RAG and fine-tuning are mutually exclusive and cannot be used together.',a:'False'},{q:'The back-end enhances the LLM\'s raw output through post-processing before presenting it to the front-end.',a:'True'},{q:'Fine-tuning can enable an LLM to generate test cases in an organization-specific output format.',a:'True'},{q:'Opacity in fine-tuned models refers to the lack of transparency in the LLM\'s decision-making.',a:'True'},{q:'LLM-powered agents invoke predefined functions called \'tools\' to interact with external systems.',a:'True'},{q:'Shadow AI refers to the use of unapproved AI tools that can lead to security and compliance risks.',a:'True'},{q:'Vague intellectual property is not a risk associated with shadow AI.',a:'False'},{q:'Defining measurable test objectives is the first step in implementing a GenAI strategy in testing.',a:'True'},{q:'Data quality is not critical to the effectiveness of LLM-powered testing.',a:'False'},{q:'Recurring cost is a key criterion when selecting LLMs/SLMs for software test tasks.',a:'True'},{q:'The Discovery phase of GenAI adoption focuses on awareness and capability building.',a:'True'},{q:'In the Utilization and Iteration phase, organizations fully integrate GenAI into their test processes.',a:'True'},{q:'The three phases of GenAI adoption must always be completed sequentially across all use cases.',a:'False'},{q:'Testers working with GenAI must combine domain expertise with AI skills.',a:'True'},{q:'Data sanitization involves removing or masking sensitive information before sharing with LLMs.',a:'True'},{q:'Test managers in AI-enabled organizations only need to manage human testers.',a:'False'},{q:'Internal communities of practice help share prompt pattern libraries and lessons learned.',a:'True'},{q:'Model performance for targeted test tasks is a key criterion when selecting an LLM/SLM.',a:'True'},{q:'Fear of job displacement is not a concern during GenAI adoption in test organizations.',a:'False'},{q:'Testers evolve from test design specialists to AI-assisted test specialists who verify AI-generated testware.',a:'True'}];
const PACKS={"1":[{"q":"Match each type of AI technology (1-4) with its CORRECT description (A-D):\n\n1. Symbolic AI\n2. Classical machine learning\n3. Deep learning\n4. Generative AI\n\nA. Uses neural networks to automatically learn features from data.\nB. Uses rule-based systems to mimic human decision-making.\nC. Uses deep learning to create new data by learning from its training data.\nD. Uses a data-driven approach that requires feature selection.","opts":["1D, 2B, 3A, 4C","1D, 2C, 3B, 4A","1C, 2B, 3D, 4A","1B, 2D, 3A, 4C"],"correct":[3],"exp_count":1,"exp":"Considering:\n• Symbolic AI uses a rule-based system to mimic human decisionmaking, representing knowledge using symbols and logical rules (1B).\n• Classical machine learning uses a data-driven approach that requires data preparation, feature selection and model training (2D).\n• Deep learning uses neural networks (machine learning structures) to automatically learn features from data (3A).\n• Generative AI uses deep learning techniques to create new data by learning and mimicking patterns from its training data (4C).\n\nThus: a) Is not correct. b) Is not correct. c) Is not correct. d) Is correct."},{"q":"Consider the realm of Large Language Models (LLMs). Which of the following options BEST explains why context window limitations affect LLM's text processing capabilities?","opts":["Because context windows restrict temporal processing sequences, preventing LLMs from maintaining chronological consistency across extended text analysis.","Because context windows prevent cross-referencing capabilities, limiting LLMs' ability to connect information across different document sources simultaneously.","Because context windows force LLMs to discard earlier information, which may contain relevant details needed for understanding later content.","Because context windows constrain parsing granularity levels, restricting LLMs from adjusting between character-level and document-level analysis approaches."],"correct":[2],"exp_count":1,"exp":"c) Is correct. When text exceeds the context window size, the model cannot simultaneously consider all parts of the document. As the model processes new tokens, it must effectively \"forget\" or discard tokens that fall outside its context window boundary.\n\nWhy not the others?\n\na) Is not correct because context windows do not control temporal sequencing but limit the scope of text that can be simultaneously considered.\nb) Is not correct because context windows affect scope within the current input, not cross-document referencing capabilities.\nd) Is not correct because context windows do not determine parsing approaches but limit the amount of text that can be processed simultaneously."},{"q":"Which of the following statements BEST describes tokenization in processing text for LLMs?","opts":["Tokenization converts tokens into high-dimensional vectors to capture their meaning.","Tokenization creates the building blocks used to understand and generate text.","Tokenization generates contextually appropriate responses using neural networks.","Tokenization predicts the next token in a sequence based on learned relationships."],"correct":[1],"exp_count":1,"exp":"b) Is correct. Tokenization involves splitting text into smaller units (tokens) that represent the building blocks of natural language generation tasks and enable LLMs to understand and generate text. See also the definition of \"Tokenization\" in the syllabus (see \"Appendix D – Generative AI Specific Terms\").\n\nWhy not the others?\n\na) Is not correct. This describes embeddings, not tokenization.\nc) Is not correct. This describes the general function of LLMs, not tokenization.\nd) Is not correct. This refers to how LLMs generate text, not tokenization."},{"q":"In the context of software testing, which of the following statements (i-v) about foundation, instruction-tuned, and reasoning LLMs are CORRECT?\n\ni. Foundation LLMs excel at generating test cases from high-level requirements without structured input.\nii. Reasoning LLMs excel at creating test scripts that strictly follow predefined organizational templates.\niii. Instruction-tuned LLMs excel at autonomously prioritizing test execution based on realtime user feedback.\niv. Reasoning LLMs excel at synthesizing data from defect reports to detect trends and prioritize test efforts.\nv. Instruction-tuned LLMs excel at generating test cases that adhere to Gherkin language syntax.","opts":["i, ii, and iii","ii, iii, and iv","i, ii, and v","iv, and v"],"correct":[3],"exp_count":1,"exp":"Considering:\ni. Is not correct. While foundation LLMs can generate test cases, they do not inherently \"excel\" at this task without structured input. Generating test cases without structured input misrepresents their capabilities.\nii. Is not correct. Creating template-based test scripts requires executing explicit instructions, which is the role of instruction-tuned LLMs, not reasoning LLMs. Reasoning LLMs specialize in logical inference and problem-solving, not rigid template adherence.\niii. Is not correct. Instruction-tuned LLMs are designed to follow structured prompts, not make autonomous decisions. Prioritizing tests based on feedback requires reasoning, which is beyond their scope.\niv. Is correct. Reasoning LLMs are explicitly designed for synthesizing multiple data sources and performing logical inference, problemsolving, and decision-making. Detecting trends and prioritizing test efforts aligns with their role in contextual analysis and the description provided.\nv. Is correct. Instruction-tuned LLMs are specifically trained to follow instructions, including adhering to requested formats, styles, and syntax rules. Generating test cases conforming to Gherkin language syntax, is a task well-suited to their capabilities.\n\nThus: a) Is not correct. b) Is not correct. c) Is not correct. d) Is correct."},{"q":"Which of the following statements BEST describes the relation between multimodal LLMs and vision-language models?","opts":["Multimodal LLMs are a subset of vision-language models designed to handle diverse inputs.","Vision-language models are a subset of multimodal LLMs focusing on visual and textual data.","Vision-language models are unrelated to multimodal LLMs and focus only on the user interface.","Multimodal LLMs and vision-language models are interchangeable terms."],"correct":[1],"exp_count":1,"exp":"b) Is correct. Vision-language models specifically integrate visual and textual data, making them a subset of multimodal LLMs.\n\nWhy not the others?\n\na) Is not correct. Vision-language models are a subset of multimodal LLMs, not the reverse.\nc) Is not correct. Vision-language models are closely related to multimodal LLMs and focus on both visual and textual data.\nd) Is not correct. Multimodal LLMs and vision-language models have distinct scopes and are not interchangeable."},{"q":"Which TWO of the following options represent key capabilities of LLMs in test tasks?","opts":["Identifying ambiguities and inconsistencies in requirements.","Generating complete application code for deployment.","Automating the execution of all test scripts without human intervention.","Performing exploratory testing on software applications.","Creating diverse test data with various combinations and boundary values."],"correct":[0,4],"exp_count":2,"exp":"a) Is correct. LLMs can analyze and clarify requirements by identifying ambiguities and inconsistencies.\ne) Is correct. LLMs can generate diverse test data, including combinations and boundaries.\n\nWhy not the others?\n\nb) Is not correct. Generating complete application code is not a key capability of LLMs in test tasks.\nc) Is not correct. LLMs can support test automation by suggesting improvements to test scripts and identifying design patterns but do not directly execute test scripts or fully automate test scripts without human oversight.\nd) Is not correct. LLMs cannot perform manual exploratory testing because it is an intuitive and adaptive process that requires human creativity, experience, and decision-making."},{"q":"Which of the following statements BEST explains the difference between AI chatbots and LLM-powered testing applications in the context of software testing?","opts":["AI chatbots are more suited for specific test tasks, while LLM-powered testing applications focus on ad hoc interactions.","Both AI chatbots and LLM-powered testing applications are designed to perform identical tasks without any configuration differences.","LLM-powered testing applications rely on conversational prompts, while AI chatbots require integration into test tools and test processes.","AI chatbots offer conversational interfaces for ad hoc test tasks, while LLM-powered testing applications provide customized solutions for specific test tasks."],"correct":[3],"exp_count":1,"exp":"d) Is correct. AI chatbots provide conversational interfaces for ad hoc tasks, while LLM-powered testing applications deliver customized solutions for specific needs.\n\nWhy not the others?\n\na) Is not correct. AI chatbots are best suited for ad hoc interactions, not specific test tasks.\nb) Is not correct. AI chatbots and LLM-powered testing applications have distinct purposes and are not identical in functionality or configurability.\nc) Is not correct. LLM-powered testing applications focus on integration into test processes, not conversational prompts. AI chatbots do not require integration into test tools and into test processes because their primary function is to facilitate ad hoc interactions rather than performing specific test tasks."},{"q":"A tester is examining a structured prompt used to obtain LLM assistance for performance test analysis. One of the components of this prompt reads: \"Test reports from performance testing tools, system monitoring logs during peak usage periods, and application performance benchmarks from previous releases\". In which component of the six-part prompt structure would this description MOST LIKELY appear?","opts":["Context","Input data","Constraints","Output format"],"correct":[1],"exp_count":1,"exp":"b) Is correct. The description lists specific data sources (test reports, monitoring logs, performance benchmarks) that will be processed by the LLM to perform the analysis task.\n\nWhy not the others?\n\na) Is not correct. Context provides background information about the test environment or system being tested, not specific (input) data to be analyzed. While this mentions performance testing, it is listing actual data sources rather than providing background information.\nc) Is not correct. Constraints outline restrictions or special considerations for how the task should be performed. The description lists actual data sources rather than restrictions on the analysis approach.\nd) Is not correct. Output format specifies the expected structure and characteristics of the LLM's response. The description lists actual data sources, without any reference to how results should be presented."},{"q":"A tester wants an LLM to analyze a requirements specification for potential defects. In the structured prompt the tester is using, one line reads: \"The potential defects must be provided in a markdown table with the following columns: ID, requirement reference, defect type, description, severity\". In which component of the six-part prompt structure would this line MOST LIKELY appear?","opts":["Instructions","Constraints","Output format","Context"],"correct":[2],"exp_count":1,"exp":"c) Is correct. The given line matches the syllabus definition of output format specifications that guide how the LLM should format the output.\n\nWhy not the others?\n\na) Is not correct. The given line does not describe what task to perform, but rather how to present the results. Instructions tell the LLM what to do, while this line tells the LLM how to format the output.\nb) Is not correct. The given line is not imposing restrictions or limitations on the analysis process, but rather specifying the desired presentation format. Constraints might include aspects like \"exclude cosmetic issues\" whereas this line is about formatting the output.\nd) Is not correct. The given line provides no background information such as the context of the requirements specification. Instead, it is about formatting the output."},{"q":"Which of the following BEST differentiates prompt chaining, few-shot prompting, and meta prompting techniques?","opts":["Prompt chaining focuses on providing examples, few-shot prompting breaks tasks into subtasks, and meta prompting refines prompts manually.","Few-shot prompting provides guidance with examples, prompt chaining breaks tasks into multiple prompts, and meta prompting allows the model to iteratively refine its own prompts.","Meta prompting emphasizes breaking down tasks into steps, prompt chaining uses examples, and few-shot prompting focuses on manual optimization of prompts.","Prompt chaining provides guidance without examples, few-shot provides guidance with examples, and meta prompting relies on tester-defined prompts."],"correct":[1],"exp_count":1,"exp":"b) Is correct. Few-shot prompting provides guidance with examples, prompt chaining decomposes tasks into intermediate steps (multiple prompts), and meta prompting uses AI to refine its own prompts iteratively.\n\nWhy not the others?\n\na) Is not correct. Prompt chaining is actually defined by decomposing a task into sequential prompts, not by \"providing examples\". Few-shot prompting is the technique that supplies examples, not the one that \"breaks tasks into subtasks\". Meta prompting relies on the LLM to revise prompts by itself, not on the tester \"refining prompts manually\".\nc) Is not correct. Meta prompting's core purpose is automatic prompt refinement by the LLM, not \"breaking tasks into steps\". Prompt chaining is characterized by step-by-step decomposition, not by \"using examples\" as stated. Few-shot prompting is the method that supplies examples to the model and it is not centered on \"manual optimization of prompts\".\nd) Is not correct. Prompt chaining is defined by stepwise decomposition, not \"guidance without examples\". Meta prompting relies on the LLM to generate/refine prompts, not solely on tester-defined wording."},{"q":"What is the primary function of a system prompt in interactions with LLMs?","opts":["To provide a framework for the LLM behavior for the entire conversation.","To provide specific questions or instructions from the user to the LLM.","To adjust dynamically with each user interaction and set the conversation's context.","To include visible input from the user and set rules for the conversation."],"correct":[0],"exp_count":1,"exp":"a) Is correct. The system prompt stays constant throughout the interaction session and establishes the fundamental framework for how the LLM should respond.\n\nWhy not the others?\n\nb) Is not correct. This describes the function of a user prompt, not a system prompt.\nc) Is not correct. The system prompt does not adjust dynamically; it remains constant.\nd) Is not correct. The system prompt is hidden and does not include visible input from the user."},{"q":"You are tasked with applying the following test approach to a set of stable requirements for a new project: generate test conditions, prioritize them based on risk level, and identify potential coverage gaps. The requirements have already been thoroughly reviewed for defects. Which of the following sequences of steps (i-v) should you follow to effectively apply Generative AI to implement this test approach using a prompt chaining technique to implement this test approach?\n\ni. Submit the requirements to the LLM and prompt it to produce test conditions based on those requirements.\nii. Provide the test conditions to the LLM, ensuring it understands the context for prioritization, and prompt it to prioritize those test conditions accordingly.\niii. Provide the prioritized test conditions to the LLM and prompt it to analyze them to determine whether all aspects of the requirements are addressed in the test conditions.\niv. Submit the requirements to the LLM and prompt it to produce prioritized test conditions that address all aspects of the requirements.\nv. Submit the requirements to the LLM and prompt it to detect inconsistencies and ambiguities in those requirements.","opts":["i, ii, and iii","iv, and ii","i, iii, and v","v, and iv"],"correct":[0],"exp_count":1,"exp":"Considering:\ni. Is correct because it ensures the process begins with generating test conditions from the requirements which aligns with the syllabus description that test analysis with GenAI involves generating test conditions based on the test basis, for example on requirements.\nii. Is correct because it clarifies the need for acceptance criteria (test conditions) to improve LLM-generated outputs and follows the syllabus guidance that LLMs can prioritize test conditions based on risk level when provided with proper context.\niii. Is correct because it directs the LLM to perform coverage analysis, addressing all aspects of the requirements, which matches the syllabus statement that LLMs can perform coverage analysis to determine whether all aspects of the test basis are covered.\niv. Is not correct because relying on minimal input without guidance does not align with the prompt chaining technique or ensure effectiveness. This option attempts to do everything in one step rather than breaking it down.\nv. Is not correct because defect identification is not central to the test objective of generating prioritized test conditions and identifying coverage gaps. The scenario specifically states the requirements are stable and already thoroughly reviewed for defects (which typically include ambiguities and inconsistencies).\n\nThus: a) Is correct. b) Is not correct. c) Is not correct. d) Is not correct.","points":2},{"q":"Consider applying the few-shot structured prompting technique to generate Gherkin-style test cases (i.e., scenario-based) for the following user story and acceptance criterion:\n\n- User story: \"As a user, I want to reset my password so that I can regain access to my account if I forget it.\"\n- Acceptance criterion: \"When a user submits a registered email address then they receive a password reset email.\"\n\nYou can rely on predefined examples that include user stories, acceptance criteria, and Gherkin-style test cases. Your task is to create a prompt to guide the LLM in generating accurate test cases aligned with the acceptance criterion for the user story above. Which one of the following prompts is BEST suited to this task?","opts":["Prompt A — Role: Act as a test analyst. Context: You are testing password reset functionality. Instruction: Generate Gherkin-style test cases for the user story and acceptance criterion. using the following predefined examples as a guide: << predefined examples >>. Input Data: <<< user story >>> and <<< acceptance criterion >>>. Constraints: Rely on best practices to create test cases. Output Format: Generate test cases with expected results.","Prompt B — Role: Act as a test analyst specializing in Gherkin-style test cases. Context: You are testing password reset functionality. Instruction: Generate Gherkin-style test cases for the user story and acceptance criterion, using the following predefined examples as a guide: << predefined examples >>. Input Data: <<< user story >>> and <<< acceptance criterion >>>. Constraints: Use \"Given-When-Then\" syntax and ensure alignment with the acceptance criterion. Output Format: Respect the given Gherkin-style test case format.","Prompt C — Role: Act as a test analyst. Context: You are testing password reset functionality. Instruction: Generate Gherkin-style test cases for the user story and acceptance criterion. Rely on best practices to create test cases. Input Data: <<< user story >>> and <<< acceptance criterion >>>. Constraints: Use \"Given-When-Then\" syntax and ensure alignment with the acceptance criterion. Output Format: Respect the given Gherkin-style test case format.","Prompt D — Role: Act as a test analyst. Context: You are testing password reset functionality. Instruction: Generate at least two Gherkin-style test cases for the user story and acceptance criterion. Focus on edge cases. Input Data: <<< user story >>> and <<< acceptance criterion >>>. Constraints: Ensure all test cases follow \"Given-When-Then\" syntax. Output Format: Respect the given Gherkin-style test case format."],"correct":[1],"exp_count":1,"exp":"b) Is correct. Comprehensive and leverages predefined examples to guide the LLM.\n\nWhy not the others?\n\na) Is not correct. While it specifies using predefined examples, it does not explicitly require the use of the \"Given-When-Then\" syntax, which is crucial for Gherkin-style test cases. It also indicates reliance on vague best practices in defining the constraints and does not specifically ensure alignment with the acceptance criterion.\nc) Is not correct. Lacks emphasis on using predefined examples and indicates reliance on vague best practices in defining the instructions.\nd) Is not correct. Focuses on edge cases but neglects comprehensive coverage and the use of examples for guidance.","points":2},{"q":"You are tasked with applying structured prompting to analyze regression test results. Here is an initial draft of the prompt:\n\nRole: Act as a test analyst.\nContext: Analyze raw regression test results from a recent test execution cycle.\nInstruction: Identify discrepancies in the test results.\nInput Data: Use the attached file containing raw test results.\nConstraints: Use the known anomalies list for cross-checking.\nOutput Format: Provide a list of discrepancies using a table format.\n\nYou are asked to improve this prompt. Which of the following improvements would BEST align the prompt with structured prompt engineering best practices for comprehensive regression test report analysis?","opts":["Add a step to cluster similar issues and cross-check findings against the known anomalies list.","Specify that the role is a regression test analyst specializing in actionable insights.","Expand the instruction to include separating expected results and actual results, clustering issues, and highlighting discrepancies.","Include references to regression testing principles such as \"Given-When-Then\" in the constraints."],"correct":[2],"exp_count":1,"exp":"c) Is correct. Expands instructions to include all critical structured analysis steps. Separating expected results and actual results helps to pinpoint mismatches effectively, clustering issues facilitates better prioritization and reduces redundancy, and highlighting discrepancies helps to focus attention on the most important findings.\n\nWhy not the others?\n\na) Is not correct. Addresses clustering and cross-checking but misses other critical steps like separating test results.\nb) Is not correct. Improves role clarity but does not expand instructions or address structured steps.\nd) Is not correct. Introduces irrelevant constraints, misaligning the prompt with the task requirements.","points":2},{"q":"You are using an LLM to assist in preparing actionable test metrics from raw data. The metrics include test progress, defect trends, and coverage, which are graphically displayed and explained with text. Your goal is to improve the test process to ensure the generated metrics are accurate, actionable, and easily interpretable by stakeholders. Here is an initial draft of a prompt used to instruct the AI:\n\nRole: Act as a test manager.\nContext: You are provided with raw data from test tools.\nInstruction: Generate test progress metrics, defect trend metrics, and coverage metrics from the raw data.\nInput Data: Use the attached file containing raw test results.\nConstraints: Ensure that the output is concise and understandable.\nOutput Format: Display metrics on a dashboard.\n\nYou are asked to improve this prompt. Which of the following improvements would BEST enhance the LLM's ability to produce accurate and actionable metrics?","opts":["Specify that the role is a test manager focusing on actionable insights and decision support, ensuring comprehensive analysis of test data.","Add an instruction to include potential risks identified from the trends in the generated metrics, along with their impact assessment and priority levels.","Expand the output format to include a plain-language summary that interprets the metrics and outlines next steps for stakeholders.","Emphasize constraints that the output is also easily interpretable by stakeholders, using clear language and avoiding technical jargon throughout the response."],"correct":[2],"exp_count":1,"exp":"c) Is correct. Expanding the output format with a plain-language summary that interprets the metrics and outlines next steps directly supports stakeholder understanding and actionability (see also the syllabus in section 2.2.4: \"Enhanced test metrics visualization and reporting\").\n\nWhy not the others?\n\na) Is not correct. Clarifies the role but does not add any concrete instruction that improves accuracy, actionability, or interpretability of the delivered metrics.\nb) Is not correct. Adds a risk-analysis task that could distract the model from the core metrics and still offers no mechanism for making results clear to stakeholders.\nd) Is not correct. Merely re-states an existing constraint and gives no specific guidance on how the LLM should achieve stakeholder-level interpretability.","points":2},{"q":"Your goal is to create test cases for an AI-based system that suffers from the test oracle problem, preventing you from determining expected results. You can only count on a few existing test cases with known expected results. Through appropriate analysis, you have identified a set of well-defined transformation rules that specify how changes to inputs affect expected results. These rules can be applied to all existing test cases. You have decided to rely on Generative AI, providing a given LLM with the following information: the existing test cases with their inputs and expected results, a clear description of the transformation rules, and guidelines for generating additional test cases by precisely applying these rules to the relevant existing test cases. With the specified information, the chosen LLM can directly generate additional test cases in line with your expectations. Which of the following prompting techniques is BEST suited to achieve your goal in this scenario?","opts":["Few-shot prompting","Prompt chaining","Meta prompting","Zero-shot prompting"],"correct":[0],"exp_count":1,"exp":"a) Is correct. Few-shot prompting is ideal in this scenario because it allows you to provide some examples (the existing test cases with inputs and expected results) to guide the LLM. By illustrating how these transformation rules are applied to generate new test cases, few-shot prompting can help the LLM understand and replicate the process to generate additional test cases.\n\nWhy not the others?\n\nb) Is not correct. While prompt chaining could be used, it could unnecessarily complicate this very straightforward task.\nc) Is not correct. While meta-prompting could be used, it is not as direct and specific as few-shot prompting to address this very straightforward task.\nd) Is not correct. Zero-shot prompting would not be effective since it would not leverage existing test cases as examples.","points":2},{"q":"You are leveraging Generative AI to assist in testing an entertainment software application. The Generative AI model generates test cases for user interaction scenarios, test scripts for API interactions, and synthetic test data to address edge cases. To effectively evaluate the Generative AI model's performance and to refine prompts, which combination of metrics and actions BEST ensures comprehensive assessment and improvement?","opts":["Evaluate the diversity of test cases to ensure varied input scenarios and use test execution success rate to validate the functionality of generated API test scripts.","Apply accuracy and completeness metrics to validate test cases against entertainment software requirements and rely on time efficiency to compare AI-generated test scripts with manual test efforts.","Focus on precision to ensure generated test data meets entertainment software compliance standards, while contextual fit and test execution success rate assesses the alignment and usability of test scripts.","Prioritize relevance and contextual fit for all outputs to maintain consistency with entertainment software requirements and include diversity metrics to expand edge case coverage."],"correct":[0],"exp_count":1,"exp":"a) Is correct. Diversity ensures comprehensive coverage of edge cases, and test execution success rate evaluates the reliability of API test scripts (see the description and example for the \"diversity\" metric provided by the syllabus within the table in section 2.3.1).\n\nWhy not the others?\n\nb) Is not correct. While accuracy and completeness are important, relying primarily on time efficiency does not fully evaluate coverage or test execution reliability.\nc) Is not correct. Precision and contextual fit are valuable but do not address diversity or thoroughly evaluate test execution success for API test scripts.\nd) Is not correct. Relevance and contextual fit are crucial, but without considering metrics like test execution success rate or accuracy, critical aspects of evaluation are missed."},{"q":"Which of the following techniques for evaluating and iteratively refining prompts is BEST suited for determining why an LLM consistently generates test cases with wrong expected results that contradict the input requirements, thereby providing insights to optimize the prompt and prevent similar errors?","opts":["Output analysis","A/B testing of prompts","Adjusting prompt length and specificity","Integrating user feedback"],"correct":[0],"exp_count":1,"exp":"a) Is correct. Output analysis examines LLM outputs for inaccuracies or inconsistencies. By classifying the wrong expected results that contradict the requirements, it reveals why the prompt misled the LLM and provides concrete insights for prompt refinement.\n\nWhy not the others?\n\nb) Is not correct. A/B testing of prompts is better suited for comparing different prompt versions than for diagnosing the cause of wrong expected results.\nc) Is not correct. While adjusting the length and specificity of prompts can improve the quality of responses by adding or reducing context, it does not directly address the cause of wrong expected results.\nd) Is not correct. While using the insights gathered from testers about the usefulness and clarity of generated output can help refine prompts to better meet real-world testing needs, it does not directly address the cause of wrong expected results."},{"q":"What is a hallucination in the context of LLM outputs?","opts":["A logical error where the LLM fails to follow a multi-step reasoning process accurately.","A bias in the LLM output caused by the training data favoring certain perspectives.","A generation of irrelevant or factually incorrect output by the LLM for a given task.","A limitation of the LLM to understand non-English perspectives in test generation tasks."],"correct":[2],"exp_count":1,"exp":"c) Is correct. Hallucinations occur when the LLM generates output that is factually incorrect or irrelevant to a given task. See the syllabus in section 3.1.1.\n\nWhy not the others?\n\na) Is not correct. This describes reasoning errors, not hallucinations.\nb) Is not correct. This described biases in AI output, not hallucinations.\nd) Is not correct. This describes biases due to underrepresentation in training data, not hallucinations."},{"q":"You are using Generative AI to create test cases for an e-commerce (e-shop) application. The following features have been explicitly mentioned in the project briefing:\n\n• cart management\n• discount code application\n• order confirmation email generation\n\nBased on these details, which of the following AI-generated test cases MOST LIKELY represents a hallucination?","opts":["Verify that a user can add multiple items to their cart and proceed to checkout.","Verify that a user cannot apply an expired discount code during checkout.","Verify that a user receives a confirmation email after successfully placing an order.","Verify that a user can create a wishlist to save favorite items for later."],"correct":[3],"exp_count":1,"exp":"d) Is correct. Wishlist management is not mentioned in the project briefing, making this the most likely hallucinated test case.\n\nWhy not the others?\n\na) Is not correct. Cart management is explicitly mentioned in the project briefing, making this test case relevant.\nb) Is not correct. Discount code application is explicitly mentioned in the project briefing, making this test case relevant.\nc) Is not correct. Order confirmation emails are explicitly mentioned in the project briefing, so this test case is valid.","points":2},{"q":"Which of the following options refers to a benefit that is MOST directly associated with using clear and structured input data formats when working with LLMs for test tasks?","opts":["Helps reduce the effort to fine-tune the LLMs for test tasks.","Helps LLMs generate less ambiguous outputs for test tasks.","Helps LLMs generate more context-relevant outputs for test tasks.","Helps LLMs generate more creative outputs for test tasks."],"correct":[1],"exp_count":1,"exp":"b) Is correct. When input data is presented in a clear and structured way, potential misunderstandings are minimized. Ambiguity often arises from unclear and poorly structured data.\n\nWhy not the others?\n\na) Is not correct. The effort to fine-tune LLMs for test tasks is associated with further training of these models on a targeted dataset, enabling them to learn the domain-specific knowledge and nuances needed to perform those test tasks. The use of clear and structured input data formats does not affect this effort.\nc) Is not correct. Context relevance is more dependent on providing the right contextual information rather than just using clear and structured input data formats.\nd) Is not correct. The use of clear and structured input data formats does not increase the creativity of LLMs in generating outputs. In particular, the use of these formats does not encourage novel responses, as it requires LLMs to generate responses that adhere to the specified formats."},{"q":"Which strategy can help reduce variability in LLM outputs by narrowing the probability distribution during inference?","opts":["Increasing the learning rate.","Lowering the temperature setting.","Increasing the random seed.","Lowering the random seed."],"correct":[1],"exp_count":1,"exp":"b) Is correct. Temperature is a parameter that controls the randomness of the output by acting on the probability distribution during inference. Lowering the temperature reduces randomness, leading to more consistent outputs.\n\nWhy not the others?\n\na) Is not correct. Learning rate is a setting (chosen before training begins) that controls the learning process of neural networks. This setting determines how much to change the model's weights during training. It is not related to inference variability. Increasing it can lead to faster convergence during training but does not affect the variability of outputs during inference.\nc) and d) are not correct. While setting a random seed can improve reproducibility, it does not in itself narrow the probability distribution during inference. Moreover, whether its value is high or low is irrelevant."},{"q":"Which of the following statements about data privacy concerns related to using Generative AI for software testing is INCORRECT?","opts":["Generative AI can unintentionally expose sensitive data through its outputs.","Generative AI tools may store and process sensitive data without explicit user consent, leading to misuse.","Using Generative AI tools without adhering to data protection regulations, such as General Data Protection Regulation (GDPR), can lead to legal disputes.","An LLM is likely to expose real sensitive data if it hallucinates while generating synthetic test data, regardless of the data it was trained on."],"correct":[3],"exp_count":1,"exp":"d) Is correct. An LLM does not expose real sensitive data if it hallucinates when generating synthetic test data, as long as it was not trained on real sensitive data. In this case hallucinations would be purely synthetic and based on patterns and structures the model learned during training. The LLM could unintentionally generate synthetic test data that matches real sensitive data (and this is definitely a concern) but it would not be exposing real sensitive data. However, this unintentional generation is very unlikely.\n\nWhy not the others?\n\na) Is not correct. This is a privacy concern, as \"Unintentional data exposure\" relates to generative AI (GenAI) outputs revealing sensitive information accidentally.\nb) Is not correct. This is a privacy concern, as it involves the lack of control over how sensitive data is stored or processed.\nc) Is not correct. This is a privacy concern related to non-compliance with regulations such as the General Data Protection Regulation (GDPR), leading to legal risks."},{"q":"An attacker injects falsified test results into the training dataset of an LLM intended to recommend optimal test coverage strategies. What type of attack vector does this description BEST refer to?","opts":["Malicious code generation","Context Manipulation","Request manipulation","Data poisoning"],"correct":[3],"exp_count":1,"exp":"d) Is correct. According to the syllabus in section 3.2.2, data poisoning involves \"manipulating training data\" with the specific example of \"providing fake evaluations when rating the results of an AI-generated test report\". The question describes this attack vector: an attacker is injecting falsified test (execution) results into the training dataset, which directly manipulates the training data to compromise the LLM's ability to accurately recommend optimal test coverage strategies.\n\nWhy not the others?\n\na) Is not correct. Malicious code generation involves \"manipulating an LLM to generate backdoors (e.g., external command calls) during use\", not injecting false data into training datasets.\nb) Is not correct. Context Manipulation involves \"sending requests designed to extract confidential training data\", not injecting false data into training datasets.\nc) Is not correct. Request manipulation involves \"introducing data that disrupts the AI's output\" during runtime use, not injecting false data into training datasets."},{"q":"Match each type of attack vector against an LLM (1-4) with the corresponding example (A-D):\n\n1. Context Manipulation\n2. Request manipulation\n3. Data poisoning\n4. Malicious code generation\n\nA. An attacker maliciously modifies the data associated with traceability links between requirements and test cases into the dataset used for fine-tuning an LLM, compromising its accuracy in generating test cases from requirements.\nB. An attacker maliciously crafts and provides deceptive prompts that induce an LLM, fine-tuned to assist testers in automated test script generation, to produce vulnerable test scripts with hidden security flaws.\nC. An attacker maliciously provides large specially crafted prompts that induce an LLM, fine-tuned to assist testers in generating test cases, to accidentally reveal confidential API keys inherited from past test projects.\nD. An attacker maliciously submits carefully modified reference screenshots into a visual testing framework that uses an LLM for comparative visual analysis, to trick the LLM into systematically ignoring genuine UI issues during regression testing.","opts":["1C, 2D, 3A, 4B","1B, 2D, 3A, 4C","1D, 2C, 3B, 4A","1C, 2B, 3D, 4A"],"correct":[0],"exp_count":1,"exp":"Considering:\n• Context Manipulation is based on sending requests designed to extract confidential training data. (1C)\n• Request manipulation is based on introducing an image that disrupts the LLM output. (2D)\n• Data poisoning is based on manipulating recommendations through fake evaluations used in training. (3A)\n• Malicious code generation is based on manipulating a LLM to generate backdoors (e.g., external command calls) during use. (4B)\n\nThus: a) Is correct. b) Is not correct. c) Is not correct. d) Is not correct."},{"q":"Which of the following strategies BEST addresses data privacy risks in the context of Generative AI-powered software testing?","opts":["Using multiple LLMs to evaluate and compare test results for improved accuracy.","Replacing sensitive test data with an anonymized version of the same.","Allowing unrestricted access to sensitive test data to improve Generative AI model training.","Disabling encryption of sensitive test data to streamline data storage and transmission processes."],"correct":[1],"exp_count":1,"exp":"b) Is correct. Anonymization is an effective strategy to mitigate data privacy risks.\n\nWhy not the others?\n\na) Is not correct. While evaluating outputs by comparing multiple LLMs is a useful practice, in this case it focuses on output quality in terms of accuracy without addressing data privacy risks.\nc) Is not correct. Unrestricted access to sensitive data increases the risk of sensitive data breach.\nd) Is not correct. Disabling encryption of sensitive data weakens security and increases the risk of sensitive data theft."},{"q":"Which of the following options about the impact of LLM usage on energy consumption and CO₂ emission is CORRECT?","opts":["Image generation tasks consume substantially more energy than text generation tasks, but produce fewer CO₂ emissions.","Generative AI-powered searches consume significantly less energy than traditional web searches due to their optimized algorithms.","Image generation tasks consume substantially more energy than text generation tasks due to their higher computational complexity.","Text generation tasks consume very little energy, allowing them to be performed by millions of users without significant energy consumption."],"correct":[2],"exp_count":1,"exp":"c) Is correct. Image generation requires significantly more computational resources than text generation, making it much more energy intensive.\n\nWhy not the others?\n\na) Is not correct. Image generation is by far much more energy- and CO₂ intensive than text generation. Energy consumption and CO₂ emissions are typically correlated unless a key factor (like energy source differences) is specified.\nb) Is not correct. On the contrary, GenAI-powered searches consume considerably more energy than traditional web searches.\nd) Is not correct. The cumulative impact of text generation across millions of users is not negligible."},{"q":"Which TWO of the following standards, or parts of them, are MOST relevant to the use of Generative AI in software testing?","opts":["ISO/IEC 25010:2023","ISO/IEC 23053:2022","ISO/IEC/IEEE 29119-2:2021","ISO/IEC 42001:2023","ISO/IEC/IEEE 29119-3:2021"],"correct":[1,3],"exp_count":2,"exp":"b) Is correct. ISO/IEC 23053:2022 is a standard (mentioned in section 3.4.1 of the syllabus) that provides a framework for data quality, transparency, and fault tolerance when using GenAI for testing.\nd) Is correct. ISO/IEC 42001:2023 is a standard (mentioned in section 3.4.1 of the syllabus) that specifies requirements for managing AI-based systems within an organization, providing best practices for GenAI in software testing and promoting consistency and reliability.\n\nWhy not the others?\n\na) Is not correct. ISO/IEC 25010:2023 is a standard (not mentioned in the syllabus, but mentioned in the CTFL syllabus) that defines a product quality model that relates to quality properties of ICT/software products. It does not address the use of GenAI in software testing.\nc) Is not correct. ISO/IEC/IEEE 29119-2:2021 is a part (volume) of the standard (not mentioned in the syllabus, but mentioned in the CTFL syllabus) that deals with test processes. It does not address the use of GenAI in software testing.\ne) Is not correct. ISO/IEC/IEEE 29119-3:2021 is a part (volume) of the standard (not mentioned in the syllabus, but mentioned in the CTFL syllabus) that deals with test documentation. It does not address the use of GenAI in software testing."},{"q":"Which of the following components of an LLM-powered testing application is responsible for combining user input with structured and semantically similar data to prepare a prompt for the LLM?","opts":["Back-end","Front-end","Authentication component","Post-processing component"],"correct":[0],"exp_count":1,"exp":"a) Is correct. The back-end is responsible for retrieving data from relational and vector databases, combining it with user input, and preparing the prompt tailored for the LLM.\n\nWhy not the others?\n\nb) Is not correct. The front-end serves as the user interface for submitting input, but it does not prepare the prompt for the LLM.\nc) Is not correct. The authentication component ensures secure access but is unrelated to data retrieval or prompt preparation.\nd) Is not correct. The post-processing component refines the output generated by the LLM but does not handle prompt preparation."},{"q":"You are a tester working on a banking application that includes features such as user login, account management, and secure transactions. The system documentation, including API specifications and security requirements, is stored in a vector database, while historical test cases are stored in a relational database. Your task is to generate test cases using a Retrieval-Augmented Generation (RAG) framework to ensure alignment with the latest specifications and requirements. Which of the following options represents the MOST appropriate use of the RAG framework in this scenario?","opts":["Submit a query specifying one function to be tested. The RAG framework will retrieve relevant specifications and requirements from the vector database, combine them with historical test cases, and automatically generate accurate and context-aware test cases through the LLM.","Submit a query specifying all functions to be tested. The RAG framework will retrieve relevant specifications and requirements from the vector database, combine them with historical test cases, and automatically generate accurate and context-aware test cases through the LLM.","Use the RAG framework to retrieve historical test cases from the relational database and security requirements from the vector database. Manually review the retrieved information before refining the query for the LLM to generate targeted test cases.","Rely on the LLM's internal training data to generate test cases while using the RAG framework or reference retrieval without directly integrating retrieved information into the generation process."],"correct":[0],"exp_count":1,"exp":"a) Is correct. RAG automatically retrieves the relevant specifications and historical test-case chunks and feeds them to the LLM, which then generates accurate, context-aware test cases.\n\nWhy not the others?\n\nb) Is not correct. RAG is better used when asked to retrieve specific targeted information from the vector database and not the whole dataset.\nc) Is not correct. Manually reviewing and refining the query adds unnecessary effort, which contradicts RAG's design to automate retrieval and use retrieved data dynamically in the LLM's response generation.\nd) Is not correct. Relying on the LLM's internal data ignores RAG's core capability of enhancing responses by integrating external, up-to-date information."},{"q":"Which of the following options BEST describes the enhancements that autonomous and semi-autonomous LLM-powered agents bring to automating test processes?","opts":["They can enhance both efficiency and quality in automating test processes through a balanced use of single-agent and multi-agent systems.","They can enhance quality in automating test processes by adding complex verification checks although at the expense of efficiency.","They can enhance both efficiency and quality in automating test processes by leveraging their ability to operate with varying levels of human interaction.","They can enhance both efficiency and quality in automating test processes while eliminating the need for verification within these test processes."],"correct":[2],"exp_count":1,"exp":"c) Is correct. Autonomous agents increase efficiency by executing test tasks within the test processes with minimal human intervention and by using automated checks designed to be efficient. Semi-autonomous agents involve strategic human oversight, which ensures the quality of test results. These two types of agents allow implementing a balanced approach to achieve both efficiency and quality by leveraging their ability to operate with varying levels of human interaction.\n\nWhy not the others?\n\na) Is not correct. Autonomous and semi-autonomous agents can be implemented as single-agent and/or multi-agent systems to improve both efficiency and quality in test processes. However, the key enhancements that these agents bring refer to increased efficiency and quality in test processes due to their ability to balance autonomy with human oversight.\nb) Is not correct. While autonomous and semi-autonomous agents incorporate verifications to ensure quality, these verifications are also designed to be efficient. The use of automated checks actually aims to streamline the test processes and enhance quality without sacrificing efficiency (and vice versa) which, in turn, is also enhanced.\nd) Is not correct. The complete elimination of verification is neither realistic nor desirable. Verification remains crucial even when using advanced technologies like autonomous and semi-autonomous LLM-powered agents."},{"q":"Which of the following statements about fine-tuning language models for specific test tasks is INCORRECT?","opts":["Fine-tuning involves training a pre-trained model on task-specific data to enhance its performance and domain knowledge.","Fine-tuning equips a language model with new capabilities by replacing its general knowledge with task-specific reasoning, ensuring the absence of overfitting.","Fine-tuning modifies a pre-trained model's parameters using a targeted dataset to adapt it for a specific domain or task.","Fine-tuning requires high-quality, task-specific datasets to avoid biased or inaccurate results."],"correct":[1],"exp_count":1,"exp":"b) Is correct. This statement is incorrect because fine-tuning does not replace the model's general knowledge, and overfitting remains a potential challenge.\n\nWhy not the others?\n\na) Is not correct. This is a correct description of fine-tuning, which enhances a model's performance in a specific domain through task-specific training.\nc) Is not correct. This is a correct description, as fine-tuning involves parameter adjustment using targeted datasets.\nd) Is not correct. This is a correct description of the challenges associated with fine-tuning, as high-quality datasets are crucial for effective adaptation."},{"q":"Which of the following BEST describes the primary focus of Large Language Model Operations (LLMOps) when deploying and managing LLMs for test tasks?","opts":["Preventing reliance on Generative AI in test processes.","Managing LLMs effectively across their lifecycle, including privacy, security, and cost considerations.","Limiting LLM usage to chatbot-based testing solutions to reduce complexity.","Fully automating all test tasks without requiring human oversight."],"correct":[1],"exp_count":1,"exp":"b) Is correct. LLMOps manages LLMs across their lifecycle, addressing privacy, security, and cost for testing.\n\nWhy not the others?\n\na) Is not correct. Large Language Model Operations (LLMOps) focuses on managing LLMs, not preventing their use.\nc) Is not correct. LLMOps applies to various applications, not just chatbotbased solutions.\nd) Is not correct. LLMOps does not aim to fully automate all test tasks or eliminate human oversight."},{"q":"Which of the following statements about shadow AI is CORRECT?","opts":["Shadow AI enforces compliance with organizational data policies and general AI regulations.","Shadow AI eliminates the need for clear licensing agreements in AI tools.","Shadow AI reduces the risk of intellectual property disputes.","Shadow AI may lead to unauthorized access to sensitive information."],"correct":[3],"exp_count":1,"exp":"d) Is correct. Unapproved AI tools often lack robust security measures, increasing the risk of data breaches or unauthorized access.\n\nWhy not the others?\n\na) Is not correct. Using unapproved GenAI tools does not enforce organizational data policies.\nb) Is not correct. Shadow AI introduces risks related to unclear licensing agreements, which can lead to intellectual property disputes.\nc) Is not correct. Shadow AI increases, rather than reduces, the risk of intellectual property disputes."},{"q":"What is a key aspect to consider when defining a Generative AI strategy for software testing?","opts":["Prepare training programs designed to ensure that team members obtain certifications specific to each LLM they use.","Select LLMs that can be integrated appropriately with existing test environments and test tools.","Ensure that as much input data as possible is available to increase the likelihood of obtaining effective LLM outputs.","Collect standard supervised machine learning metrics to evaluate the effectiveness of LLM outputs."],"correct":[1],"exp_count":1,"exp":"b) Is correct. Selecting LLMs that are compatible with existing test infrastructure and scalability requirements is a critical aspect of a GenAI strategy. Test tools and test environments are fundamental elements of test infrastructure.\n\nWhy not the others?\n\na) Is not correct. While certifications for specific LLMs could be useful (and more available in the future), training programs should be designed to ensure that test team members have the technical skills necessary to use GenAI tools effectively.\nc) Is not correct. Data quality and structured, secure input are crucial for achieving reliable results with GenAI. It is necessary to have an adequate amount of high-quality input data (according to the test objectives), not as much data as possible.\nd) Is not correct. Section 5.1.2 states that a GenAI strategy for software testing must collect task-specific metrics to measure the effectiveness of LLM outputs. The metrics listed in section 2.3.1 (e.g., relevance, execution-success rate, time efficiency) are tailored to test tasks rather than borrowed wholesale from classical supervised-machine learning."},{"q":"Which of the following statements BEST describes one of the key criteria to select an appropriate LLM for specific test tasks within a test organization?","opts":["A key selection criterion involves evaluating the LLM's performance for the test tasks against the publicly available benchmarks for LLMs in code generation.","A key selection criterion involves evaluating recurring costs such as those associated with the computational resources required to run the LLM.","A key selection criterion involves evaluating the LLM against the publicly available community benchmarks to ensure full compatibility with them.","A key selection criterion involves evaluating recurring costs such as those associated with a proof of concept aimed at demonstrating the suitability of an LLM for the test tasks."],"correct":[1],"exp_count":1,"exp":"b) Is correct. A key selection criterion involves evaluating recurring costs (see the syllabus in section 5.1.3), and this answer refers to a typical example of a recurring cost.\n\nWhy not the others?\n\na) Is not correct. A key selection criterion involves evaluating the model's performance for the test tasks against the organization's benchmarks using metrics such as those presented in the syllabus itself (see the syllabus in section 5.1.3). The specified criterion may be relevant if the test tasks involve code generation. However, it is not applicable for other types of test tasks.\nc) Is not correct. Evaluating the LLM against the publicly available community benchmarks to ensure full compatibility with them is not one of the key criteria mentioned in the syllabus (in section 5.1.3) for selecting an appropriate LLM for specific test tasks.\nd) Is not correct. A key selection criterion involves evaluating recurring costs (see the syllabus in section 5.1.3), but this answer refers to a typical example of a non-recurring cost."},{"q":"What are the key phases in the adoption of Generative AI in a test organization?","opts":["Discovery, initiation and usage definition, utilization and iteration","Awareness, usage prioritization, performance monitoring","Planning, experimentation, evaluation and refinement","Training, testing, implementation, and scaling"],"correct":[0],"exp_count":1,"exp":"The syllabus describes the following 3 key phases:\n• Discovery\n• Initiation and usage definition\n• Utilization and iteration (GenAI-5.1.4 K1)\n\nThus:\na) Is correct: explicitly mentions all these 3 key phases.\nb) Is not correct: does not explicitly mention any of these 3 phases (only mentions some objectives and/or activities associated with these phases).\nc) Is not correct: does not explicitly mention any of these 3 phases (only mentions some objectives and/or activities associated with these phases).\nd) Is not correct: does not explicitly mention any of these 3 phases (only mentions some objectives and/or activities associated with these phases)."},{"q":"Which of the following options BEST refers to an example of knowledge and/or skills required for testers to work effectively with LLMs in test processes?","opts":["Mastering techniques specifically aimed at preventing LLMs from hallucinating and incurring reasoning errors when performing specific test tasks.","Selecting and implementing suitable test automation approaches, such as keyword-driven test automation, for automating test processes.","Choosing the most appropriate LLM based on criteria such as its capability to be adapted or customized to perform specific test tasks.","Ensuring the validation and test data used in the development of the LLMs are of the highest quality."],"correct":[2],"exp_count":1,"exp":"c) Is correct. The syllabus names \"assessing LLM capabilities\" as a key competency. This naturally entails comparing candidate models and selecting the one that can be adapted or fine-tuned for particular test tasks. That is a direct example of knowledge/skills testers need to work effectively with LLMs.\n\nWhy not the others?\n\na) Is not correct. Hallucinations in LLMs are intrinsic challenges with current AI technologies, and testers cannot prevent hallucinations and reasoning errors from occurring. Instead, testers should be able to (identify and) mitigate the risks of hallucinations and reasoning errors (and also biases) when testing with GenAI.\nb) Is not correct. While proficiency in test automation is valuable, it does not specifically address the integration of GenAI into test processes.\nd) Is not correct. It refers to skills that are required for developing the LLMs such as AI researchers and data scientists, not testers using GenAI for test tasks. These testers focus on ensuring the quality of the test data they directly use when interacting with LLMs."},{"q":"What is the BEST approach for cultivating skills within test teams to specifically support the adoption of Generative AI?","opts":["Rely mainly on external expert courses with hands-on practice, aiming to integrate AI into all daily test tasks at once.","Encourage independent experimentation with various LLMs without following a structured process.","Adopt a hands-on, gradual learning process supported by guided exercises, peer learning, and knowledge-sharing communities.","Rely mainly on theoretical courses from external experts, aiming to gradually integrate AI into daily test tasks in line with actual learning."],"correct":[2],"exp_count":1,"exp":"c) Is correct. Focusing on developing practical skills through structured activities, peer learning, and knowledge-sharing communities is a recommended approach.\n\nWhy not the others?\n\na) Is not correct. While external expert courses with hands-on practice can be beneficial, mainly relying on them diminishes the importance of internal practice and community building. Moreover, integrating AI into all daily test tasks in a 'big-bang' fashion is not recommended.\nb) Is not correct. While experimentation is part of the learning process, independent experimentation without structure may not lead to consistent or effective skill development.\nd) Is not correct. Mainly relying on theoretical courses from external experts diminishes the importance of developing practical skills and know-how through sharing within the organization."},{"q":"Which of the following answers BEST describes how the roles and responsibilities of testers and test managers within a test organization are impacted by the adoption of Generative AI for software testing?","opts":["Testers shift their focus from manually designing test cases to guiding and verifying AI-generated testware.","Test managers shift their focus from managing test projects to understanding the inner working of Generative AI technologies.","Testers shift their focus from manually designing test cases to overseeing AI-based test processes.","Test managers shift their focus from relying on people to relying solely on Generative AI to boost productivity in test tasks."],"correct":[0],"exp_count":1,"exp":"a) Is correct. Testers evolve into AI-assisted testing specialists, refining prompts and verifying AI outputs.\n\nWhy not the others?\n\nb) Is not correct. The responsibilities of the test managers are updated to include the development of an AI-based test strategy, AI-based risk management, and oversight of AI-based test processes. Thus, their focus is still on test management and not on understanding the (technically complex) inner workings of GenAI technologies.\nc) Is not correct. Testers do not shift their focus to overseeing AI-based test processes. This oversight is the responsibility of test managers.\nd) Is not correct. Test managers must balance human and AI capabilities to achieve efficient results."}]};
const CTAI_PACKS={"1":[{"q":"Which of the following statements provides the BEST example of the 'AI Effect'?","opts":["People lose their jobs as AI-based systems perform their roles cheaper and better","Competitive computer games lose popularity as AI-based systems always win","Rule-based expert systems for medical diagnosis are no longer considered to be AI","People believe AI will take over the world, as shown in films"],"correct":[2],"exp_count":1,"exp":"c) is correct. The 'AI Effect' is defined as the change in the definition of AI as technology advances. Rule-based systems for medical diagnosis were popular examples of AI in the 1970s and 1980s but are often not considered AI today.\n\nWhy not the others?\n\na) People in many occupations may lose their jobs to AI-based systems, but this is simply progress, not the 'AI Effect'.\nb) For some computer games, AI-based systems can outplay humans, but there is little evidence of a drop-off in the popularity of such games.\nd) The gullibility of cinema goers believing killer robots will take over the world is not the 'AI Effect'."},{"q":"Which of the following options is NOT a technology used to implement AI?","opts":["Support vector machine","Decision tree","Evolutionary reasoning","Bayesian optimization"],"correct":[2],"exp_count":1,"exp":"c) is correct. There is no such AI technology as evolutionary reasoning. Such a term is occasionally used in discussions related to biological evolution-based rules considered AI a few decades ago.\n\nWhy not the others?\n\na) Support vector machines are a form of machine learning.\nb) Decision trees are a form of machine learning.\nd) Bayesian optimization is a form of machine learning."},{"q":"Which of the following statements about the hardware used to implement AI-based systems is MOST likely to be CORRECT?","opts":["The processors used to train a mobile recommendation system must be the same as the processors on the mobile phone","Graphical processing units (GPUs) are a reasonable choice to implement an AI-based computer vision system","Deep learning systems need to be trained, evaluated, and tested using AI-specific chips","It is always best to choose processors with more bits to achieve sufficient accuracy for AI-based systems"],"correct":[1],"exp_count":1,"exp":"b) is correct. GPUs are designed for the parallel processing of images using thousands of cores, which is close to what is required for an AI-based computer vision system that would most likely be implemented as a neural network.\n\nWhy not the others?\n\na) The two activities of training a ML model and inference from that model are quite different so there is normally no reason that they should be performed on the same processors.\nc) It is still possible to train, evaluate and test a simple deep-learning system on a PC with limited GPU support — so specific chips for AI are not needed, but they would be far faster.\nd) Many AI-based systems are not focused on exact calculations, but rather on probabilistic determinations and so the accuracy of processors with many bits is often unnecessary."},{"q":"There are a number of good quality pre-trained models available in the market and you want to use one of them for an image-based classifier. You have decided to ask the provider of the model about the data used for training the model and its format. Which of the following statements is the BEST example of a risk that you are trying to mitigate by asking these questions?","opts":["Bad classification accuracy of the pre-trained models","Differences in the data used to train the model and the operational data","Performance efficiency issues of the pre-trained model","Lack of explainability of the pre-trained model compared to that of a model trained by you"],"correct":[1],"exp_count":1,"exp":"b) is correct. The data used to train the model should be similar to the data used for making the predictions.\n\nWhy not the others?\n\na) The question mentions the good quality of the pre-trained model, so this risk should be negligible.\nc) Performance does not appear to be an issue in this situation.\nd) Explainability does not appear to be an issue in this situation, nor can it be achieved by looking at the training data and its format."},{"q":"Which of the following statements is MOST likely to be specifying a requirement for autonomy in an AI-based system?","opts":["The system shall maintain a safe distance to other vehicles until the brake or accelerator is pushed by the driver","The system shall learn the preferred style of response to emails by remotely monitoring the email traffic","The system shall compare its predictions of house prices with actual selling prices to determine if it needs to be retrained","It shall be possible to modify the system's behavior to work with different types of users in less than a day"],"correct":[0],"exp_count":1,"exp":"a) is correct. This requirement defines the human interventions that define the end of the system working autonomously.\n\nWhy not the others?\n\nb) This requirement is specifying a required function for how the system shall perform self-learning.\nc) This requirement is specifying how the system will manage concept drift, in this case most likely caused by the house market changing.\nd) This is specifying an adaptability requirement — the maximum time it should take to make a change to the system."},{"q":"Which of the following statements about bias in AI-based systems is NOT correct?","opts":["Bias may be caused by users of a book recommendation system making choices that deliberately cause the system to make poor suggestions","Bias may be caused in the employee age of death prediction system by collecting the training data from a dataset of patients who are all retired","Bias may be caused in the creditworthiness system by using training data obtained from those who own and use a credit card","Bias may be caused in the navigation system by using a route planning algorithm that is too complex to be explained to typical users"],"correct":[3],"exp_count":1,"exp":"d) is correct. If the algorithm cannot be explained, then it lacks explainability, but that does not mean it is biased nor unbiased.\n\nWhy not the others?\n\na) Bias can be caused by users deliberately poisoning the self-learning of an AI-based system.\nb) Bias can be caused when the training data does not correctly match those who the system will be applied to. For instance, employees will typically be younger than retired patients.\nc) Bias can be caused when the training data does not correctly match those who the system will be applied to. For instance, most people using credit cards are already considered creditworthy, which is a typical example of sample bias."},{"q":"Which of the following is MOST likely to be an example of reward hacking?","opts":["The programmer's assistant tool optimizes the code to provide reduced response times, while still ensuring that functional requirements are met","An anesthetic supply device with a goal of keeping patients stable during surgery supplies too many doses and patients do not wake up as quickly as expected","The third-party development organization paid their AI programmers based on the number of lines of code they write","A type of AI used to play competitive computer games against humans that is focused on getting the highest score"],"correct":[1],"exp_count":1,"exp":"b) is correct. This could be 'reward hacking' if the system achieves one goal to the detriment of others, in this case the need for patients to wake up.\n\nWhy not the others?\n\na) It appears that the tool is achieving its two goals with there being no detrimental effects, so this is unlikely to be 'reward hacking'.\nc) Reward hacking is not a form of paying AI developers.\nd) Some game-playing AI-based systems are driven by a reward function, but this is not known as 'reward hacking'."},{"q":"Given the following attributes for an AI-based system (I-V):\n\nI. Probabilistic\nII. Explicable\nIII. Unfair\nIV. Non-deterministic\nV. Deterministic\n\nWhich list of attributes below is likely to cause the MOST difficulties if the system is to be used as part of a safety-related system?","opts":["I, IV","II, IV","II, III, V","I, III, V"],"correct":[0],"exp_count":1,"exp":"a) I, IV is correct.\n\nConsidering the given attributes:\nI. Probabilistic — a definite problem for safety-related systems as this causes non-determinism.\nII. Explicable — normally needed for safety-related systems.\nIII. Unfair — not ideal, but sometimes unavoidable — and not a special problem for safety-related systems.\nIV. Non-deterministic — a definite problem for safety-related systems.\nV. Deterministic — normally needed for safety-related systems.\n\nTherefore, I and IV are the attributes that are MOST problematic for safety-related systems.\n\nWhy not the others?\n\nb), c), d) — Include attributes that are actually desirable for safety-related systems."},{"q":"Which of the following statements BEST describes classification and regression as part of supervised learning?","opts":["Regression is checking that the ML model test results do not change when the same test data is executed","Classification is the assignment of unlabeled data into predefined classes","Classification is the labelling of the data before training the ML model","Regression is predicting the number of classes that are output by the ML model"],"correct":[1],"exp_count":1,"exp":"b) is correct. Classification is when input data to a ML model is classified into one of a few predefined classes.\n\nWhy not the others?\n\na) Regression in the context of supervised learning is generally when the ML model outputs a numeric result.\nc) Training data needs to be labelled for training in supervised learning, but this activity is not known as classification. It is simply labelling.\nd) Regression is when the output from the ML model is numeric, but the output is not a number of classes."},{"q":"Which of the following options BEST describes an example of reinforcement learning?","opts":["The mobile game app updates its feedback, response timing and the number of user options it provides based on how much the players spend","The language translation app searches the internet to find text provided in multiple languages to improve its translation function","The factory quality control system uses video cameras and audio analysis to identify manufactured items that are faulty based on monitoring a human quality control operative","The software component test prediction system uses a range of quality measures to identify which components are likely to contain the most defects"],"correct":[0],"exp_count":1,"exp":"a) is correct. The amount spent can be considered the reward function for this system, with the system changing its behavior to increase the amount spent.\n\nWhy not the others?\n\nb) The app is using text in what can be considered a source language and a 'correct' translation of this source. Therefore, it is relying on a form of supervised learning with no reward function mentioned.\nc) The system is using the human quality control operative as a form of 'gold' standard and so is relying on a form of supervised learning.\nd) There is no suggestion that any reward function is used, instead it is most likely that the prediction system bases its determination of defects on past experience. Therefore, it is probably also relying on a supervised learning system."},{"q":"You have been asked for your opinion on the ML approach to be used for a new system that is part of the traffic management for a SMART city. The idea is that the new system will control the traffic lights in the city to ensure traffic flows easily through and around the city. Which of the following approaches do you expect MOST likely to succeed?","opts":["Unsupervised learning that is based on identifying clusters around the city where the traffic density is higher than average","A supervised learning regression solution based on thousands of journeys labelled with both journey length and duration","Reinforcement learning that is based on a reward function that penalizes solutions that result in higher levels of traffic congestion","A supervised learning classification solution that is based on drivers and passengers submitting their favorite routes for traversing the city"],"correct":[2],"exp_count":1,"exp":"c) is correct. A continually improving reinforcement learning system with a reward function based on lower levels of congestion as a measure of success is valid for this type of system.\n\nWhy not the others?\n\na) It should be possible for the unsupervised learning system to identify areas that are congested, but this alone will not provide the solution.\nb) A regression solution is unlikely to provide us with what we want as the predicted speed of individual journeys will not provide an overall solution to citywide congestion.\nd) This solution is dependent on volunteers submitting subjective opinions that will most likely result in a solution that changes back and forth as the system adopts favorite routes that then become congested.","points":2},{"q":"When performing testing of a trained model, an ML engineer found that the model was highly accurate when evaluated with validation data but that it performed poorly with independent test data. Which of the following options is MOST likely to cause this situation?","opts":["Underfitting","Concept drift","Overfitting","Poor acceptance criteria"],"correct":[2],"exp_count":1,"exp":"c) is correct. The bad performance on test data and good on validation data suggests overfitting.\n\nWhy not the others?\n\na) The model performs well on validation data, so it is not a case of underfitting.\nb) Concept drift refers to changes after the model training and validation stage.\nd) Poor acceptance criteria should be consistent with different sets of data, so are unlikely to lead to a difference between the test results with validation data and independent test data."},{"q":"Which of the following is an example of a challenge that is likely to be encountered in the course of developing and testing an ML solution?","opts":["Data anonymization operations typically require knowledge of various ML algorithms","The data used might be unstructured data","A large percentage of the budget gets spent just in data preparation","The data pipeline scalability is a challenge when training the model"],"correct":[2],"exp_count":1,"exp":"c) is correct. Up to 36% of ML workflow effort may be spent in data preparation.\n\nWhy not the others?\n\na) Data anonymization operations do not require knowledge of ML algorithms.\nb) Unstructured data is not a challenge. Images, audio, free-flowing text are all examples of unstructured data.\nd) Scalability typically is a requirement at deployment, rather than when training."},{"q":"The data scientist has complained that the model cannot be trained with one particular algorithm, although other algorithms work with the same training data. Which of the following options is the MOST likely reason for this?","opts":["Wrong data","Missing data","Badly labelled data","Insufficient data"],"correct":[3],"exp_count":1,"exp":"d) is correct. Since models based on some learning algorithms can be trained with the data. However, if it does not work for one particular algorithm, it is MOST likely to be that the quantity of the data is not sufficient for that particular algorithm.\n\nWhy not the others?\n\na) Since models based on some learning algorithms can be trained with the data but not one particular algorithm, it indicates that the data is correct.\nb) Since models based on some learning algorithms can be trained with the data but not one particular algorithm, it indicates that there is no missing data.\nc) Since models based on some learning algorithms can be trained with the data but not one particular algorithm, it indicates that the data is correctly labelled."},{"q":"DataSure is a start-up with a product that promises to improve the quality of ML models. DataSure claim that this improvement comes from checking if the data has been labeled correctly. Which of the following defects is MOST likely to have been prevented by using this product?","opts":["The model will have security vulnerabilities","The model will have poor accuracy","The model will not fulfill its intended function","The model will produce biased outputs"],"correct":[1],"exp_count":1,"exp":"b) is correct. Mislabeled data results in reduced accuracy of the ML model.\n\nWhy not the others?\n\na) Data privacy and security issues are not being handled. Hence the product is not going to prevent security issues.\nc) A model not being fit for purpose arises from incorrect or unfair data, not mislabeled data.\nd) A biased model results from incomplete data, unbalanced data, unfair data, data lacking diversity, or duplicate data, rather than from mislabeled data."},{"q":"An ML engineer, upon finding insufficient training data, is rotating labeled images to create additional training data. Which of the following approaches to labeling is being applied in this above example?","opts":["Crowdsourcing","Augmentation","AI-based labeling","Outsourcing"],"correct":[1],"exp_count":1,"exp":"b) is correct. Augmentation is being performed here by transforming existing labelled data.\n\nWhy not the others?\n\na) Crowdsourcing is when you use a large number of people to provide some work. In this case only one person is performing the task.\nc) AI is not being used for labeling of the data.\nd) The ML engineer has not outsourced the task to a third party."},{"q":"The confusion matrix for an image classifier is shown below.\n\n{{table:0}}\n\nWhich of the following options represents the precision of the classifier?","opts":["20/120 *100","78/120 *100","78/100 *100","22/100 *100"],"correct":[2],"exp_count":1,"exp":"c) is correct. The formula for Precision = TP / (TP+FP) *100 = 78/(78+22) = 78/100 *100.\n\nWhy not the others?\n\na), b), d) — See option c for the correct formula and calculation.","tables":["<div class=\"q-table-wrap\"><table class=\"q-table\"><thead><tr><th>Confusion Matrix</th><th>Actual Positive</th><th>Actual Negative</th></tr></thead><tbody><tr><th>Predicted Positive</th><td>78</td><td>22</td></tr><tr><th>Predicted Negative</th><td>6</td><td>14</td></tr></tbody></table></div>"],"points":2},{"q":"ThermalSpace is a solution provider that helps thermal power plants to optimize their power output. Their solution is based on an ML model created using past data with clearly marked output. The model helps determine the amount of electricity to be generated at a given time of the day. To determine the quality of the model using ML functional performance metrics, which of the following metrics is MOST likely to be used?","opts":["R-squared","Precision","Recall","False Positives"],"correct":[0],"exp_count":1,"exp":"a) is correct. It is a supervised regression problem because the model outputs a continuous value, the amount of electricity to be generated, which uses the R-Squared or MSE/RMSE metric.\n\nWhy not the others?\n\nb), c), d) — These are metrics for classification."},{"q":"KnowYourPet is an app utilizing ML to determine whether a pet is hungry or not. It is understood that a dog is likely to be not hungry most of the time, as reflected in the training data. If the dog is mis-diagnosed as hungry then it may lead to overfeeding of the dog and this could lead to serious health issues. Which of the following metrics would you choose for determining the suitability of the model under test?","opts":["Accuracy","Precision","Recall","F1-score"],"correct":[1],"exp_count":1,"exp":"b) is correct. Precision should be used because the cost of false-positives (overfeeding the dog) is high (serious health issues).\n\nWhy not the others?\n\na) Accuracy is not useful when there is an imbalance in the expected classes and the not hungry class dominates in this case.\nc) Recall is useful when the positives should not be missed. In this case, precision is also important (see b) and hence recall alone is not very useful. F1-score is a better choice.\nd) F1-score is useful when there is an imbalance in the expected classes and when precision and recall are similarly important, but in this case precision appears to be far more important than recall.","points":2},{"q":"Which of the following options BEST describes a deep neural net?","opts":["It is comprised of a hierarchical structure of neurons with the lowest (deepest) neurons making most of the decisions","It is comprised of connected neurons where each neuron has an associated bias and each connection has an associated weight","It is made up several layers with each layer (except input and output layers) connected to each other layer and errors are propagated backwards through the network","It is made up of layers of neurons, each of which generates an activation value based on the other neurons in the same layer"],"correct":[1],"exp_count":1,"exp":"b) is correct. As with the human brain, an artificial neural network is comprised of connected neurons. To perform its calculation of an activation value, each neuron is assigned a bias and each connection is assigned a weight.\n\nWhy not the others?\n\na) A neural network does not have a hierarchical structure.\nc) A neural network is made up of several layers and errors are propagated backwards through the network, but the layers of a neural network are only connected to the next layers (not each other layer).\nd) A neural network is made up of layers of neurons, but the activation value is based on the neurons in the preceding layer (not the same layer)."},{"q":"Which of the following statements CORRECTLY describes a test coverage measure for neural networks?","opts":["Value change coverage is based on individual neurons being seen to affect the overall output of the neural network","Threshold coverage is based on neurons outputting an activation value greater than a preset value between zero and one","Neuron coverage is a measure of the proportion of neurons that are activated at any time during the testing","Sign change coverage measures the coverage of neurons that output both positive, negative and zero activation values"],"correct":[1],"exp_count":1,"exp":"b) is correct. Threshold coverage measures the proportion of neurons activated during testing with a value greater than a preset threshold value.\n\nWhy not the others?\n\na) Value change coverage is a measure of the proportion of neurons activated where their activation values differ by more than a preset change amount. It is not concerned with the overall output of the neural network.\nc) All neurons are potentially 'activated' each time a neural network is 'run', however the values output by the neurons change, which is what is measured by neuron coverage (coverage achieved by a value greater than zero).\nd) Sign change coverage is a measure of the proportion of neurons activated with both positive and negative activation values, but not zero activation values."},{"q":"Which of the following requirements for an AI-based system is MOST likely to cause a significant challenge in testing?","opts":["The system shall be more accurate than the system it is replacing","The AI component in the system shall have 100% accuracy","A human operator should be able to override the system in 1 second","The system shall mimic the human emotions of a typical game player"],"correct":[3],"exp_count":1,"exp":"d) is correct. This requirement is extremely complex to test without defining all human emotions and how the system might mimic them.\n\nWhy not the others?\n\na) This is a specific requirement with a test oracle, so should not usually cause a testing challenge.\nb) This may be a difficult requirement to achieve but should not create a testing challenge.\nc) This is a testable requirement."},{"q":"Which of the following is a factor associated with the test data that can make the testing of AI-based systems difficult?","opts":["Sourcing big data with high velocity","Sourcing data from a single source","Sourcing data separately from the data scientists","Sourcing data from public websites"],"correct":[0],"exp_count":1,"exp":"a) is correct. Sourcing data for AI systems that use large quantities of high-velocity data can be difficult.\n\nWhy not the others?\n\nb) Sourcing consistent data from multiple sources can be difficult.\nc) Sourcing data separately is good practice as it prevents common failures with the data scientists.\nd) Sourcing data from public websites is straightforward."},{"q":"Why would the accuracy of human decisions be considered in testing as well as the accuracy of AI-based systems?","opts":["Intuitive human decisions can be made faster than a corresponding AI-based system in some situations","Unethical decisions can be made by humans as well as AI-based systems","The accuracy of human decisions is not relevant to testing AI-based systems","Human decisions may be of lower quality when they have been recommended by an AI-based system"],"correct":[3],"exp_count":1,"exp":"d) is correct. Human decisions supported by recommendations by AI-based systems may be of lower quality than human decisions without recommendations from a system, and this should be considered in testing.\n\nWhy not the others?\n\na) Speed of decision making is not related to accuracy.\nb) The ethical choices made by humans are not related to testing AI-based systems.\nc) The accuracy of human decisions is relevant as systems may make recommendations that humans approve or review."},{"q":"An ML-based toll charging solution determines the type of incoming vehicles from the images captured by a camera. There are different types of cameras available and the solution provider claims to be able to use cameras of different resolutions. The images need to be in jpeg format with a size of 320X480 pixels for the purpose of training the model as well as for predicting the outcome. The model should be able to classify the vehicle types with a certain desired high level of accuracy and should be tested against vulnerabilities. Each toll plaza will have its own complete system unconnected to any other system. Which of the following types of testing are the MOST appropriate options for the tests you would choose for system testing?","opts":["Testing for concept drift","Adversarial testing","Scalability testing","Fairness testing","Data pipeline testing"],"correct":[1,4],"exp_count":2,"exp":"b) and e) are correct.\n\nb) Adversarial testing is important because the requirements state that the system should be tested against vulnerabilities.\ne) Data pipeline testing is required because the images can come in various formats and resolutions. For the model to be trained all images should have same format hence this testing is important.\n\nWhy not the others?\n\na) Concept drift is tested after deployment.\nc) Scalability testing has not been mentioned as one of the requirements. These are independent systems and are not connected to any other systems.\nd) Fairness is using positively biased data for training. Since there is no case of positive discrimination here, fairness testing is not relevant.","points":2},{"q":"Which of the following statements BEST describes a testing challenge that specifically applies to a self-learning system?","opts":["The system requires regular retraining and therefore requires regular testing","The system is regularly released which means regression testing is required","The system changes in such a way that tests that previously passed can fail","The system requires a human operator, who is also required for testing"],"correct":[2],"exp_count":1,"exp":"c) is correct. Tests on a system that makes changes to itself may start to fail, even if they previously passed.\n\nWhy not the others?\n\na) A system that requires regular retraining cannot be described as self-learning.\nb) A system that needs to be regularly released cannot be described as self-learning as it requires frequent releases to adapt to change.\nd) A system that requires a human operator is unlikely to be self-learning."},{"q":"Which of the following is NOT likely to be required to test a system for bias?","opts":["Involving selected users that are known to be biased","Measuring how changes in test inputs change test outputs","Observing how production outputs correlate to production inputs","Obtaining additional data from other sources"],"correct":[0],"exp_count":1,"exp":"a) is correct. Users that exhibit bias are not required to test a system for bias because they do not help to determine whether the behavior of the system is biased.\n\nWhy not the others?\n\nb) Measuring how test inputs change test outputs is important when testing for bias as it can show how the system is biased towards or against particular inputs.\nc) Measuring how production inputs change production outputs is important when testing for bias because different results might be seen in production.\nd) Obtaining external data sources can be essential when testing for bias in case the bias is based on \"hidden\" variables."},{"q":"Which of the following statements BEST describes how system complexity can create challenges when testing an AI-based system?","opts":["Testing for bias may require data that the team does not have","Manual generation of white-box tests can be difficult","Determining whether a system is ethical can be subjective","It can be difficult to find representative data to train a model"],"correct":[1],"exp_count":1,"exp":"b) is correct. Understanding how the system works and creating enough tests to achieve effective coverage are challenges caused by the complexity of AI-based systems.\n\nWhy not the others?\n\na) Bias does not usually relate to system complexity.\nc) Ethics is not usually related to AI-based system complexity.\nd) Difficulty finding representative data to train a model is not related to testing or AI-based system complexity."},{"q":"An AI-based system is being used by the health ministry to identify vulnerable groups of patients, who will be provided with support and advice to help prevent them suffering future illnesses to which they may be susceptible. The results will also be shared with other government agencies and medical insurance companies. The system is initially being trained on a large set of data collected by the health ministry from two surveys of 5,000 men over 50 years of age and 25,000 women over 30 years of age. The system will continue to identify vulnerable patients by gathering information from publicly available social media. Which of the following attributes should be MOST carefully considered when specifying the objectives and acceptance criteria for the system?","opts":["Adaptability","Bias","Explainability","Flexibility","Autonomy"],"correct":[1,2],"exp_count":2,"exp":"b) and c) are correct.\n\nb) Bias — the data being used for training is biased towards women (25,000 vs 5,000) and towards specific age groups, therefore bias needs to be carefully considered.\nc) Explainability — the results may affect the identified vulnerable patients both medically and financially. They should be able to see why they have been labelled as susceptible so they can ensure they have been correctly chosen and as part of explainability requirements related to data privacy.\n\nWhy not the others?\n\na) Adaptability — is the ability of the system to be modified. There is no reason to believe the operational environment for the system will change much.\nd) Flexibility — is the ability of a system to change its behavior, but there is no reason at this point to think this system will need to be used outside the initial specification.\ne) Autonomy — is the ability of the system to work for sustained periods without human intervention. There is no reason to think that the system will have to work for sustained periods without intervention."},{"q":"An ML engineer is trying to find exploitable inputs and then use these inputs to retrain the models to make them immune to these inputs. Which of the following options BEST describes the approach being used by the ML engineer?","opts":["Validation","Adversarial testing","Data pipeline testing","Scalability testing"],"correct":[1],"exp_count":1,"exp":"b) is correct. This is an example of adversarial testing.\n\nWhy not the others?\n\na) This is not validation as the exploitable inputs are being found and retraining is being done using those inputs.\nc) There are not any data pipelines that are being tested in this situation.\nd) No scalability tests are being performed in this example."},{"q":"A test manager has to select test techniques to be used for testing autonomous vehicle software. There are a large number of environmental conditions (>50) that need to be considered for seven vehicle functions. Which of the following test techniques is MOST likely to be used when testing the variety of vehicle functions (VF) in different environmental conditions (EC)?","opts":["A/B testing based on the VF and EC parameters","Combination testing of all the parameters of VF and EC","Pairwise testing of the relevant values of VF and EC","Back-to-back testing of relevant VF and EC values"],"correct":[2],"exp_count":1,"exp":"c) is correct. Pairwise testing is best suited to reduce the number of combinations without sacrificing defect detection too much.\n\nWhy not the others?\n\na) A/B testing is not useful for combinatorial testing.\nb) All combinations would be almost impossible to do in practice, resulting from the near infinite number of potential combinations.\nd) Back-to-back testing is not useful for combinatorial testing."},{"q":"A test manager decides to have a non-AI system with similar functionality to the AI based system under test (SUT) built to support system testing. Which of the following statements is most likely to be CORRECT?","opts":["The test manager has chosen back-to-back testing because it helps solve the test oracle problem by using a pseudo-oracle","The test manager has chosen A/B testing because it helps solve the test oracle problem by using a pseudo-oracle","The test manager has chosen back-to-back testing because the non-functional requirements of the SUT can be verified against the pseudo-oracle","The test manager has chosen A/B testing because the non-functional requirements of the SUT can be verified against the pseudo-oracle"],"correct":[0],"exp_count":1,"exp":"a) is correct. It is an example of back-to-back testing where the non-AI system is used as a pseudo-oracle.\n\nWhy not the others?\n\nb) With A/B testing, we use a variant of the SUT to compare with the SUT.\nc) The resources and non-functional characteristics of the pseudo-oracle and the SUT are likely to be different, hence the alternate system cannot be used for non-functional testing.\nd) It is an example of back-to-back testing and also the resources and non-functional characteristics of the pseudo-oracle and the SUT are likely to be different."},{"q":"An AI-based mobile phone search system provides a list of phones that it believes are most suitable for the user based on its knowledge of the user's previous mobile phone usage and their specified preferences. Given that metamorphic testing is being used with the following source test case:\n\n{{table:0}}\n\nAnd this test data for two corresponding follow-up test cases:\n\n{{table:1}}\n\nWhich of the following options is MOST likely to be a valid list of recommended phones for the follow-up test cases?","opts":["T1: SnapHappy_X1, SnapHappy_M2\nT2: ClickNow_1000x, ClickNow_1000xs","T1: SnapHappy_M2, SnapHappy_M3, ClickNow_1000xs\nT2: SnapHappy_X1, ClickNow_1000x","T1: SnapHappy_X1, SnapHappy_M2, SnapHappy_M3, ClickNow_1000x, ClickNow_1000xs\nT2: SnapHappy_X1, SnapHappy_M2, SnapHappy_M3","T1: SnapHappy_X1, SnapHappy_M2, SnapHappy_M3, ClickNow_1000x, ClickNow_1000xs\nT2: SnapHappy_X1, SnapHappy_M2, SnapHappy_M3, ClickNow_1000x, ClickNow_1000xs"],"correct":[1],"exp_count":1,"exp":"b) is correct.\n\nFollow-up test case T1 differs from the source test case by the change in requirements for a 3D camera; it is now more specific. A 3D camera must be included. So, that means the follow-up expected results can only include the original test results at most (the previously recommended phones with a 3D camera).\n\nFollow-up test case T2 also differs from the source test case by the change in requirements for a 3D camera; it is also more specific. No 3D camera should be included. So, that means the follow-up expected results can only include the original test results at most (the previously recommended phones without a 3D camera).\n\nAs T1 lists phones with a 3D camera, the remaining phones from the source test case must be those with no 3D camera — and so they should be in T2.\n\nTherefore, T1 and T2 combined should contain all the cameras from the source test case, but with no overlap between the two.\n\nWhy not the others?\n\na), c), d) — Do not satisfy the metamorphic relation.","tables":["<div class=\"q-table-wrap\"><table class=\"q-table\"><thead><tr><th colspan=\"2\">Inputs</th><th>Outputs</th></tr></thead><tbody><tr><td>Selected price range:</td><td>$200-$300</td><td><strong>Recommended Phones:</strong></td></tr><tr><td>3D camera:</td><td>Don't care</td><td>SnapHappy_X1</td></tr><tr><td>Screen size:</td><td>mid to large</td><td>SnapHappy_M2</td></tr><tr><td>OS:</td><td>Android or iOS</td><td>SnapHappy_M3</td></tr><tr><td>Battery Life:</td><td>Don't care</td><td>ClickNow_1000x</td></tr><tr><td></td><td></td><td>ClickNow_1000xs</td></tr></tbody></table></div>","<div class=\"q-table-wrap\"><table class=\"q-table\"><thead><tr><th colspan=\"2\">Input T1</th></tr></thead><tbody><tr><td>Selected price range:</td><td>$200-$300</td></tr><tr><td>3D camera:</td><td>yes</td></tr><tr><td>Screen size:</td><td>mid to large</td></tr><tr><td>OS:</td><td>Android or iOS</td></tr><tr><td>Battery Life:</td><td>Don't care</td></tr><tr class=\"q-table-section\"><th colspan=\"2\">Input T2</th></tr><tr><td>Selected price range:</td><td>$200-$300</td></tr><tr><td>3D camera:</td><td>no</td></tr><tr><td>Screen size:</td><td>mid to large</td></tr><tr><td>OS:</td><td>Android or iOS</td></tr><tr><td>Battery Life:</td><td>Don't care</td></tr></tbody></table></div>"],"points":2},{"q":"System testing of an AI-based system is being planned. It has been suggested that exploratory testing is used in addition to scripted test techniques. Which of the following scenarios is MOST likely to be an example of exploratory testing being performed?","opts":["Training data is visualized using tools to look at various aspects of the data","Tests written using equivalence partitioning during the previous test cycle are being run","The Google 'ML test checklist' is being used","ML functional performance metrics are being calculated"],"correct":[0],"exp_count":1,"exp":"a) is correct. This is Exploratory Data Analysis which is an exploratory method.\n\nWhy not the others?\n\nb) This is scripted testing.\nc) This is checklist-based testing.\nd) Calculating ML functional performance metrics is not exploratory testing."},{"q":"LAIgal systems has an AI-based product for extracting relevant favorable judgements similar to a given legal case. This product is used by judges in the courts. Details of the current case are provided, and the system produces relevant judgements. The system needs to be safe from malicious inputs. A similar open-source product exists and is available. Not having a suitable test oracle is a challenge when testing. Which of the following test techniques should be selected to test the new version during system testing?","opts":["A/B testing","Back-to-back testing","Adversarial testing","State transition testing","ML functional performance metrics calculation"],"correct":[1,2],"exp_count":2,"exp":"b) and c) are correct.\n\nb) Back-to-back testing uses a similar product as a pseudo-oracle for testing.\nc) Adversarial testing is important here as it is being used for a very important purpose and adversarial data can cause harm.\n\nWhy not the others?\n\na) A/B testing is most useful when comparing two variants for the purpose of deciding if the new variant is an improvement over the older variant.\nd) While state transition testing might be useful, nothing in the scenario suggests it; therefore, it is not the most relevant technique.\ne) This testing is appropriate at the model testing stage for classification problems. It is not appropriate at the system testing stage for non-classification problems.","points":2},{"q":"Which one of the following statements is an example of a difference between a test environment for AI-based systems and a test environment for conventional systems?","opts":["Test environments for AI-based systems may require some mechanism to determine how a particular decision is made","Test environments for AI-based systems need simulators and virtual environments whereas conventional systems do not need these","Test environments for AI-based systems need large amounts of data, whereas conventional systems do not need large amount of data","GPUs are required for test environments for AI-based systems whereas conventional systems do not need these"],"correct":[0],"exp_count":1,"exp":"a) is correct. Explainability mechanism may need to be provided for AI environments.\n\nWhy not the others?\n\nb) Simulators and virtual environments are often required for conventional systems.\nc) Large amount of data may be required for conventional systems, as well.\nd) GPUs may be required for many other systems as well, for example, games."},{"q":"In which of the following situations would AI be MOST useful when categorizing new defects?","opts":["A small number of defects requires categorization on a new application","A large number of defects is reported on a small application","Minimal data is provided in typical defect reports","A new development team needs to know the most appropriate developer to fix a defect"],"correct":[1],"exp_count":1,"exp":"b) is correct. Where a large number of defects is reported on a small application there is most likely to be benefit and opportunity to identify duplicates.\n\nWhy not the others?\n\na) Where a small number of defects requires categorization and there is no historical data, AI would not have training data to be used.\nc) Where minimal data is provided in the defect reports, the usefulness of the tool will be lower, as less data will be available to the algorithm.\nd) For AI to recommend developers to fix defects it would need to be based on historical data. However, because a new development team is taking over, any recommendations would be inaccurate until historical data is available."},{"q":"Which of the following is an AI tool MOST likely to use as the basis for generating functional test cases?","opts":["A test charter","A picture of the system as a flow chart","Web server logs","Crash reports"],"correct":[2],"exp_count":1,"exp":"c) is correct. Web server logs may reflect production use of the system and provide a way for AI to generate tests.\n\nWhy not the others?\n\na) A test charter provides a focus for exploratory testing, and it rarely leads to the generation of test cases, even if an AI-based tool could interpret it.\nb) A flow chart could be used to generate tests, but it needs to be machine readable, rather than simply a picture.\nd) Crash reports are unlikely to be used as they would describe unexpected failures rather than the functions performed by the application."},{"q":"Which of the following options CORRECTLY states how an AI-based tool can perform optimization of regression test suites?","opts":["By analyzing false positive test results","By analyzing information from previous testing activities","By using genetic algorithms to create new test cases","By updating the expected results to counter concept drift"],"correct":[1],"exp_count":1,"exp":"b) is correct. Optimization of regression test suites is performed by analyzing information on previous test executions.\n\nWhy not the others?\n\na) The goal of regression testing optimization is to reduce the size, prioritize or augment a test suite, not to reduce false positives.\nc) Per section 11.4 of the syllabus, regression test optimization is typically performed using previous test execution data. Using genetic algorithms to create new tests is unlikely to achieve the goal of optimizing the regression test suite.\nd) It is important to consider regression testing and concept drift together, however per section 11.4 concept drift is not related to regression test optimization using AI."},{"q":"Which of the following options CORRECTLY states how an AI-based tool can perform defect prediction?","opts":["Using natural language to ask developers where they predict defects will occur","By analyzing the causes of defects raised on a similar code base","By analyzing false positive defects","Scanning code to identify defects using rules."],"correct":[1],"exp_count":1,"exp":"b) is correct. Defect prediction is performed by looking for correlations between code/process/people measures and defects on the same or a similar code base.\n\nWhy not the others?\n\na) While natural language processing is an AI application, it is not used for defect prediction.\nc) The goal of defect prediction is not to identify defects with a false positive result. To analyze them would have little value.\nd) Defect prediction does not involve scanning of code using rules. This is static analysis."}]};
const CTFL_PACKS={"1":[{"q":"Which of the following statements describe a valid test objective?","opts":["To prove that there are no unfixed defects in the system under test","To prove that there will be no failures after the implementation of the system into production","To reduce the risk level of the test object and to build confidence in the quality level","To verify that there are no untested combinations of inputs"],"correct":[2],"exp_count":1,"exp":"c) is correct. Testing finds defects and failures which reduces the level of risk and at the same time gives more confidence in the quality level of the test object.\n\nWhy not the others?\n\na) It is impossible to prove that there are no defects anymore in the system under test. See testing principle 1.\nb) See testing principle 7.\nd) It is impossible to test all combinations of inputs (see testing principle 2)."},{"q":"Which of the following options shows an example of test activities that contribute to success?","opts":["Having testers involved during various software development lifecycle (SDLC) activities will help to detect defects in work products","Testers try not to disturb the developers while coding, so that the developers write better code","Testers collaborating with end users help to improve the quality of defect reports during component integration and system testing","Certified testers will design much better test cases than non-certified testers"],"correct":[0],"exp_count":1,"exp":"a) is correct. It is important that testers are involved from the beginning of the software development lifecycle (SDLC). It will increase understanding of design decisions and will detect defects early.\n\nWhy not the others?\n\nb) Both developers and testers will have more understanding of each other's work products and how to test the code.\nc) End users will not help the testers in increasing the quality of defect reports; also, users usually do not participate in low-level testing levels like integration testing.\nd) Being certified does not automatically mean that the tester will be better in test design."},{"q":"You have been assigned as a tester to a team producing a new system incrementally. You have noticed that no changes have been made to the existing regression test cases for several iterations and no new regression defects were identified. Your manager is happy, but you are not. Which testing principle explains your skepticism?","opts":["Tests wear out","Absence-of-defects fallacy","Defects cluster together","Exhaustive testing is impossible"],"correct":[0],"exp_count":1,"exp":"a) is correct. This principle means that if the same tests are repeated over and over again, eventually these tests no longer find any new defects. This is probably why the tests all passed in this release as well.\n\nWhy not the others?\n\nb) This principle says about the mistaken belief that just finding and fixing a large number of defects will ensure the success of a system.\nc) This principle says that a small number of components usually contain most of the defects.\nd) This principle states that testing all combinations of inputs and preconditions is not feasible."},{"q":"You work in a team that develops a mobile application for food ordering. In the current iteration the team decided to implement the payment functionality. Which of the following activities is a part of test analysis?","opts":["Estimating that testing the integration with the payment service will take 8 person-days","Deciding that the team should test if it is possible to properly share payment between many users","Using boundary value analysis (BVA) to derive the test data for the test cases that check the correct payment processing for the minimum allowed amount to be paid","Analyzing the discrepancy between the actual result and expected result after executing a test case that checks the process of payment with a credit card, and reporting a defect"],"correct":[1],"exp_count":1,"exp":"b) is correct. This is an example of defining test conditions which is a part of test analysis.\n\nWhy not the others?\n\na) Estimating the test effort is part of test planning.\nc) Using test techniques to derive coverage items is a part of test design.\nd) Reporting defects found during dynamic testing is a part of test execution."},{"q":"Which of the following factors have a SIGNIFICANT influence on the test approach?\n\ni. The SDLC\nii. The number of defects detected in previous projects\niii. The identified product risks\niv. New regulatory requirements forcing formal white-box testing\nv. The test environment setup","opts":["i, ii have significant influence","i, iii, iv have significant influence","ii, iv, v have significant influence","iii, v have significant influence"],"correct":[1],"exp_count":1,"exp":"b) is correct. Statements i, iii, and iv are true.\n\ni. The SDLC has an influence on the test approach.\niii. The identified product risks are one of the most important factors influencing the test approach.\niv. Regulatory requirements are important factors influencing the test approach.\n\nWhy not the others?\n\nii. The number of defects detected in previous projects may have some influence, but this is not as significant as i, iii and iv.\nv. The test environment has no significant influence on the test approach."},{"q":"Which TWO of the following tasks belong MAINLY to a testing role?","opts":["Configure test environments","Maintain the product backlog","Design solutions to new requirements","Create the test plan","Analyze the test basis"],"correct":[0,4],"exp_count":2,"exp":"a) and e) are correct.\n\na) This is done by the testers.\ne) This is done by the testers since its technical task is done as part of a test analysis.\n\nWhy not the others?\n\nb) The product backlog is built and maintained by the product owner.\nc) This is done by the development team.\nd) This is a managerial role."},{"q":"Which of the following skills (i-v) are the MOST important skills of a tester?\n\ni. Having domain knowledge\nii. Creating a product vision\niii. Being a good team player\niv. Planning and organizing the work of the team\nv. Critical thinking","opts":["ii and iv are important","i, iii and v are important","i, ii and v are important","iii and iv are important"],"correct":[1],"exp_count":1,"exp":"b) is correct. Statements i, iii, and v are true.\n\ni. Having domain knowledge is an important tester skill.\niii. Being a good team player is an important skill.\nv. Critical thinking is one of the most important skills of testers.\n\nWhy not the others?\n\nii. This is a task of the business analyst together with the business representative.\niv. Planning and organizing the work of the team is a task of the test manager or, mostly in an Agile software development project, the whole team and not just the tester."},{"q":"How is the whole team approach present in the interactions between testers and business representatives?","opts":["Business representatives decide on test automation approaches","Testers help business representatives to define a test strategy","Business representatives are not part of the whole team approach","Testers help business representatives to create suitable acceptance tests"],"correct":[3],"exp_count":1,"exp":"d) is correct. Testers will work closely with business representatives to ensure that the desired quality levels are achieved. This includes supporting and collaborating with them to help them create suitable acceptance tests.\n\nWhy not the others?\n\na) The test automation approach is defined by testers with the help of developers and business representatives.\nb) The test strategy is decided in collaboration with the developers.\nc) Testers, developers, and business representatives are part of the whole team approach."},{"q":"Consider the following rule: \"for every SDLC activity there is a corresponding test activity\". In which SDLC models does this rule hold?","opts":["Only in sequential development models","Only in iterative development models","Only in iterative and incremental development models","In sequential, incremental, and iterative development models"],"correct":[3],"exp_count":1,"exp":"d) is correct. This rule holds for all SDLC models.\n\nWhy not the others?\n\na), b), c) — Each restricts the rule to a subset of SDLC models, which is incorrect."},{"q":"Which of the following statements BEST describes the acceptance test-driven development (ATDD) approach?","opts":["In ATDD, acceptance criteria are typically created based on the given/when/then format","In ATDD, test cases are mainly created at component testing and are code-oriented","In ATDD, tests are created, based on acceptance criteria to drive the development of the related software","In ATDD, tests are based on the desired behavior of the software, which makes it easier for team members to understand them"],"correct":[2],"exp_count":1,"exp":"c) is correct. In acceptance test-driven development (ATDD) tests are written from acceptance criteria as part of the design process.\n\nWhy not the others?\n\na) It is more often used in behavior-driven development (BDD).\nb) It is the description of test-driven development (TDD).\nd) It is used in BDD."},{"q":"Which of the following is NOT an example of the shift-left approach?","opts":["Reviewing the user requirements before they are formally accepted by the stakeholders","Writing a component test before the corresponding code is written","Executing a performance efficiency test for a component during component testing","Writing a test script before setting up the configuration management process"],"correct":[3],"exp_count":1,"exp":"d) is correct. Test scripts should be subject to configuration management, so it makes no sense to create the test scripts before this process is set up.\n\nWhy not the others?\n\na) Early review is an example of the shift-left approach.\nb) TDD is an example of the shift-left approach.\nc) Early non-functional testing is an example of the shift-left approach."},{"q":"Which of the arguments below would you use to convince your manager to organize retrospectives at the end of each release cycle?","opts":["Retrospectives are very popular these days and clients would appreciate it if we added them to our processes","Organizing retrospectives will save the organization money because without them end user representatives do not provide immediate feedback about the product","Process weaknesses identified during the retrospective can be analyzed and serve as a to do list for the organization's continuous process improvement program","Retrospectives embrace five values including courage and respect, which are crucial to maintain continuous improvement in the organization"],"correct":[2],"exp_count":1,"exp":"c) is correct. Regularly conducted retrospectives, when appropriate follow up activities occur, are critical to continual improvement of development and testing.\n\nWhy not the others?\n\na) Retrospectives are more useful for identifying improvement opportunities and have little importance for clients.\nb) Retrospectives are not aimed to collect feedback about the product, but about the process. Additionally, retrospectives are internal activity for the team and should not include end user representatives.\nd) Courage and respect are values of Extreme Programming and are not closely related to retrospectives."},{"q":"Which types of failures (1-4) fit which test levels (A-D) BEST?\n\n1. Failures in system behavior as it deviates from the user's business needs\n2. Failures in communication between components\n3. Failures in logic in the code\n4. Failures in not correctly implemented business rules\n\nA. Component testing\nB. Component integration testing\nC. System testing\nD. Acceptance testing","opts":["1D, 2B, 3A, 4C","1D, 2B, 3C, 4A","1B, 2A, 3D, 4C","1C, 2B, 3A, 4D"],"correct":[0],"exp_count":1,"exp":"a) 1D, 2B, 3A, 4C is correct.\n\n• The test basis for acceptance testing is the user's business needs (1D).\n• Communication between components is tested during component integration testing (2B).\n• Failures in logic can be found during component testing (3A).\n• Business rules are the test basis for system testing (4C).\n\nWhy not the others?\n\nb), c), d) — All have one or more incorrect mappings."},{"q":"You are testing a user story with three acceptance criteria: AC1, AC2 and AC3. AC1 is covered by test case TC1, AC2 by TC2, and AC3 by TC3. The test execution history had three test runs on three consecutive versions of the software as follows:\n\n{{table:0}}\n\nTests are repeated once you are informed that all defects found in the test run are corrected and a new version of the software is available. Which of the above tests are executed as regression tests?","opts":["Only 4, 7, 8, 9","Only 5, 7","Only 4, 6, 8, 9","Only 5, 6"],"correct":[1],"exp_count":1,"exp":"b) Only 5, 7 is correct.\n\nBecause TC1 and TC3 failed in Execution 1 (i.e., test (1) and test (3)), test (4) and test (6) are confirmation tests.\nBecause TC2 and TC3 failed in Execution 2 (i.e., tests (5) and (6)), test (8) and test (9) are also confirmation tests.\nTC2 passed in Execution 1 (i.e., test (2)), so test (5) is a regression test.\nTC1 passed in Execution 2 (i.e., test (4)), so test (7) is also a regression test.\n\nWhy not the others?\n\na), c), d) — Include confirmation tests or miss regression tests.","tables":["<div class=\"q-table-wrap\"><table class=\"q-table\"><thead><tr><th></th><th>Execution 1</th><th>Execution 2</th><th>Execution 3</th></tr></thead><tbody><tr><th>TC1</th><td>(1) failed</td><td>(4) passed</td><td>(7) passed</td></tr><tr><th>TC2</th><td>(2) passed</td><td>(5) failed</td><td>(8) passed</td></tr><tr><th>TC3</th><td>(3) failed</td><td>(6) failed</td><td>(9) passed</td></tr></tbody></table></div>"]},{"q":"Which of the following is NOT a benefit of static testing?","opts":["Having less expensive defect management due to the ease of detecting defects later in the SDLC","Fixing defects found during static testing is generally much less expensive than fixing defects found during dynamic testing","Finding coding defects that might not have been found by only performing dynamic testing","Detecting gaps and inconsistencies in requirements"],"correct":[0],"exp_count":1,"exp":"a) is correct. Defect management is no less expensive. Finding and fixing defects later in the SDLC is more costly.\n\nWhy not the others?\n\nb), c), d) — These are all benefits of static testing."},{"q":"Which of the following is a benefit of early and frequent feedback?","opts":["It improves the test process for future projects","It forces customers to prioritize their requirements based on agreed risks","It provides a measure for the quality of changes","It helps avoid requirements misunderstandings"],"correct":[3],"exp_count":1,"exp":"d) is correct. Early and frequent feedback can prevent misunderstandings about requirements.\n\nWhy not the others?\n\na) Feedback can improve the test process, but if one only wants to improve future projects, the feedback does not need to come early or frequently.\nb) Feedback is not used to prioritize requirements.\nc) There is no one, recommended way to measure quality of changes. Also, this is not one of the benefits of early feedback that are mentioned in section 3.2.1."},{"q":"The reviews being used in your organization have the following attributes:\n\n• There is the role of a scribe\n• The main purpose is to evaluate quality\n• The meeting is led by the author of the work product\n• There is individual preparation\n• A review report is produced\n\nWhich of the following review types is MOST likely being used?","opts":["Informal review","Walkthrough","Technical review","Inspection"],"correct":[1],"exp_count":1,"exp":"b) Walkthrough is correct.\n\nConsidering the attributes:\n• Specified for walkthroughs, technical reviews, and inspections; thus, the reviews being performed cannot be informal reviews.\n• The purpose of evaluating quality is one of the most important objectives of a walkthrough.\n• Author-led meetings are not allowed for inspections and are typically not done in technical reviews. A moderator is needed in walkthroughs and is allowed for informal reviews.\n• All types of reviews can include individual preparation (even informal reviews).\n• All types of reviews can produce a review report, although informal reviews do not require documentation.\n\nWhy not the others?\n\na), c), d) — Each conflicts with at least one of the listed attributes."},{"q":"Which of these statements is NOT a factor that contributes to successful reviews?","opts":["Participants should dedicate adequate time for the review","Splitting large work products into small parts to make the required effort less intense","Participants should avoid behaviors that might indicate boredom, exasperation, or hostility to other participants","Failures found should be acknowledged, appreciated, and handled objectively"],"correct":[3],"exp_count":1,"exp":"d) is correct. During reviews one can find defects, not failures.\n\nWhy not the others?\n\na) Adequate time for individuals is a success factor.\nb) Splitting work products into small adequate parts is a success factor.\nc) Avoiding behaviors that might indicate boredom, exasperation, etc. is a success factor."},{"q":"Which of the following is a characteristic of experience-based test techniques?","opts":["Test cases are created based on detailed design information","Items tested within the interface code section are used to measure coverage","The test techniques heavily rely on the tester's knowledge of the software and the business domain","The test cases are used to identify deviations from the requirements"],"correct":[2],"exp_count":1,"exp":"c) is correct. This is a common characteristic of experience-based test techniques. This knowledge and experience include expected use of the software, its environment, likely defects, and the distribution of those defects is used to define tests.\n\nWhy not the others?\n\na) This is a common characteristic of white-box test techniques. Test conditions, test cases, and test data are derived from a test basis that may include code, software architecture, detailed design, or any other source of information regarding the structure of the software.\nb) This is a common characteristic of white-box test techniques. Coverage is measured based on the items tested within a selected structure and the test technique applied to the test basis.\nd) This is a common characteristic of black-box test techniques. Test cases may be used to detect gaps within requirements and the implementation of the requirements, as well as deviations from the requirements."},{"q":"You are testing a simplified apartment search form which has only two search criteria:\n\n• floor (with three possible options: ground floor; first floor; second or higher floor)\n• garden type (with three possible options: no garden; small garden; large garden)\n\nEach of the apartments on the ground floor has a garden, apartments on higher floors don't. The form has a built-in validation mechanism that will not allow you to use the search criteria which violate this rule. Each test has two input values: floor and garden type. You want to apply equivalence partitioning (EP) to cover each floor and each garden type in your tests. What is the minimal number of test cases to achieve 100% EP coverage for valid partitions?","opts":["3","4","5","6"],"correct":[1],"exp_count":1,"exp":"b) 4 is correct.\n\nThe situation presented in the question is described in the syllabus as \"each choice\" coverage.\n\n\"Small garden\" and \"large garden\" can go only with \"ground floor\", so we need two test cases with \"ground floor\" which cover these two \"garden type\" partitions.\n\nWe need two more test cases to cover the two other \"floor\" partitions. The remaining \"garden type\" partition of \"no garden\" is covered by these tests. We need a total of four test cases:\n\nTC1 (ground floor, small garden)\nTC2 (ground floor, large garden)\nTC3 (first floor, no garden)\nTC4 (second or higher floor, no garden)\n\nWhy not the others?\n\na), c), d) — Either too few to achieve coverage or more than needed."},{"q":"You are testing a system that calculates the final course grade for a given student. The final grade is assigned based on the final result, according to the following rules:\n\n• 0 – 50 points: failed\n• 51 – 60 points: fair\n• 61 – 70 points: satisfactory\n• 71 – 80 points: good\n• 81 – 90 points: very good\n• 91 – 100 points: excellent\n\nYou have prepared the following set of test cases:\n\n{{table:0}}\n\nWhat is the 2-value boundary value analysis (BVA) coverage for the final result that is achieved with the existing test cases?","opts":["50%","60%","33.3%","100%"],"correct":[0],"exp_count":1,"exp":"a) 50% is correct.\n\nThere are 12 boundary values for the final result values: 0, 50, 51, 60, 61, 70, 71, 80, 81, 90, 91, and 100.\n\nThe test cases cover six of them (TC1 – 91, TC2 – 50, TC3 – 81, TC4 – 60, TC5 – 70, TC6 – 80). Therefore, the test cases cover 6/12 = 50%.\n\nWhy not the others?\n\nb), c), d) — Incorrect calculations of coverage.","tables":["<div class=\"q-table-wrap\"><table class=\"q-table\"><thead><tr><th></th><th>Final result</th><th>Final grade</th></tr></thead><tbody><tr><th>TC1</th><td>91</td><td>Excellent</td></tr><tr><th>TC2</th><td>50</td><td>Failed</td></tr><tr><th>TC3</th><td>81</td><td>Very good</td></tr><tr><th>TC4</th><td>60</td><td>Fair</td></tr><tr><th>TC5</th><td>70</td><td>Satisfactory</td></tr><tr><th>TC6</th><td>80</td><td>Good</td></tr></tbody></table></div>"]},{"q":"Your favorite bicycle daily rental store has just introduced a new Customer Relationship Management system and asked you, one of their most loyal members, to test it. The implemented features are as follows:\n\n• Anyone can rent a bicycle, but members receive a 20% discount\n• However, if the return deadline is missed, the discount is no longer available\n• After 15 rentals, members get a gift: a T-Shirt\n\nDecision table describing the implemented features looks as follows:\n\n{{table:0}}\n\nBased ONLY on the feature description of the Customer Relationship Management system, which of the above rules describes an impossible situation?","opts":["R4","R2","R6","R8"],"correct":[3],"exp_count":1,"exp":"d) R8 is correct. No discount as a non-member that has also missed a deadline, but only members can receive a gift T-Shirt. Hence, the action is not correct.\n\nWhy not the others?\n\na) R4: A member without a missed deadline can get a discount and a gift T-Shirt after 15 bicycle rentals.\nb) R2: A member without a missed deadline can get a discount but no gift T-Shirt until they rented a bicycle 15 times.\nc) R6: Non-members cannot get a discount, even if they did not miss a deadline yet.","tables":["<div class=\"q-table-wrap\"><table class=\"q-table\"><thead><tr><th>Conditions:</th><th>R1</th><th>R2</th><th>R3</th><th>R4</th><th>R5</th><th>R6</th><th>R7</th><th>R8</th></tr></thead><tbody><tr><th>Being a member</th><td>T</td><td>T</td><td>T</td><td>T</td><td>F</td><td>F</td><td>F</td><td>F</td></tr><tr><th>Missed deadline</th><td>T</td><td>F</td><td>T</td><td>F</td><td>T</td><td>F</td><td>F</td><td>T</td></tr><tr><th>15th rental</th><td>F</td><td>F</td><td>T</td><td>T</td><td>F</td><td>F</td><td>T</td><td>T</td></tr><tr class=\"q-table-section\"><th colspan=\"9\">Actions:</th></tr><tr><th>20% discount</th><td></td><td>X</td><td></td><td>X</td><td></td><td></td><td></td><td></td></tr><tr><th>Gift T-shirt</th><td></td><td></td><td>X</td><td>X</td><td></td><td></td><td></td><td>X</td></tr></tbody></table></div>"]},{"q":"You test a system whose lifecycle is modeled by the state transition diagram shown below. The system starts in the INIT state and ends its operation in the OFF state.\n\n{{table:0}}\n\nWhat is the MINIMAL number of test cases to achieve valid transitions coverage?","opts":["4","2","7","3"],"correct":[3],"exp_count":1,"exp":"d) 3 is correct.\n\n\"test\" and \"error\" transitions cannot occur in one test case.\nNeither can both \"done\" transitions.\nThis means we need at least three test cases to achieve transition coverage.\n\nFor example:\nTC1: test, done\nTC2: run, error, done\nTC3: run, pause, resume, pause, done\n\nWhy not the others?\n\na), b), c) — Either too few or more than necessary.","tables":["<div style=\"text-align:center;margin:8px 0;\"><svg viewBox=\"0 0 600 290\" xmlns=\"http://www.w3.org/2000/svg\" style=\"display:inline-block;max-width:100%;height:auto;\"><defs><marker id=\"arr\" viewBox=\"0 0 10 10\" refX=\"9\" refY=\"5\" markerWidth=\"7\" markerHeight=\"7\" orient=\"auto\"><path d=\"M0,0 L10,5 L0,10 z\" fill=\"#333\"/></marker></defs><style>.st{fill:#fff;stroke:#333;stroke-width:1.5;}.lbl{font-family:Nunito,sans-serif;font-size:14px;fill:#111;text-anchor:middle;font-weight:800;}.edge{stroke:#333;stroke-width:1.5;fill:none;}.edgelbl{font-family:Nunito Sans,sans-serif;font-size:13px;fill:#111;text-anchor:middle;}</style><rect class=\"st\" x=\"40\" y=\"115\" width=\"120\" height=\"50\" rx=\"4\"/><text class=\"lbl\" x=\"100\" y=\"146\">INIT</text><rect class=\"st\" x=\"230\" y=\"40\" width=\"120\" height=\"50\" rx=\"4\"/><text class=\"lbl\" x=\"290\" y=\"62\">DEBUG</text><text class=\"lbl\" x=\"290\" y=\"80\">MODE</text><rect class=\"st\" x=\"230\" y=\"190\" width=\"120\" height=\"50\" rx=\"4\"/><text class=\"lbl\" x=\"290\" y=\"212\">IN</text><text class=\"lbl\" x=\"290\" y=\"230\">OPERATION</text><rect class=\"st\" x=\"440\" y=\"190\" width=\"120\" height=\"50\" rx=\"4\"/><text class=\"lbl\" x=\"500\" y=\"222\">ON HOLD</text><rect class=\"st\" x=\"440\" y=\"40\" width=\"120\" height=\"50\" rx=\"4\"/><text class=\"lbl\" x=\"500\" y=\"72\">OFF</text><path class=\"edge\" d=\"M100,115 L100,75 L230,75\" marker-end=\"url(#arr)\"/><text class=\"edgelbl\" x=\"165\" y=\"67\">test</text><path class=\"edge\" d=\"M100,165 L100,205 L230,205\" marker-end=\"url(#arr)\"/><text class=\"edgelbl\" x=\"165\" y=\"222\">run</text><path class=\"edge\" d=\"M350,65 L440,65\" marker-end=\"url(#arr)\"/><text class=\"edgelbl\" x=\"395\" y=\"55\">done</text><path class=\"edge\" d=\"M290,190 L290,90\" marker-end=\"url(#arr)\"/><text class=\"edgelbl\" x=\"315\" y=\"143\">error</text><path class=\"edge\" d=\"M350,210 L440,210\" marker-end=\"url(#arr)\"/><text class=\"edgelbl\" x=\"395\" y=\"203\">pause</text><path class=\"edge\" d=\"M440,225 L350,225\" marker-end=\"url(#arr)\"/><text class=\"edgelbl\" x=\"395\" y=\"245\">resume</text><path class=\"edge\" d=\"M500,190 L500,90\" marker-end=\"url(#arr)\"/><text class=\"edgelbl\" x=\"525\" y=\"143\">done</text></svg></div>"]},{"q":"Your test suite achieved 100% statement coverage. What is the consequence of this fact?","opts":["Each instruction in the code that contains a defect has been executed at least once","Any test suite containing more test cases than your test suite will also achieve 100% statement coverage","Each path in the code has been executed at least once","Every combination of input values has been tested at least once"],"correct":[0],"exp_count":1,"exp":"a) is correct. Since 100% statement coverage is achieved, every statement, including the ones with defects, must have been executed and evaluated at least once.\n\nWhy not the others?\n\nb) Coverage depends on what is tested, not on the number of test cases. For example, for code \"if (x==0) y=1\", one test case (x=0) achieves 100% statement coverage, but two test cases (x=1) and (x=2) together achieve only 50% statement coverage.\nc) If there is a loop in the code there may be an infinite number of possible paths, so it is not possible to execute all the possible paths in the code.\nd) Exhaustive testing is not possible (see the seven testing principles section in the syllabus). For example, for code \"input x; print x\" any single test with arbitrary x achieves 100% statement coverage, but covers one input value."},{"q":"Which of the following is NOT true for white-box testing?","opts":["During white-box testing the entire software implementation is considered","White-box coverage metrics can help identify additional tests to increase code coverage","White-box test techniques can be used in static testing","White-box testing can help identify gaps in requirements implementation"],"correct":[3],"exp_count":1,"exp":"d) is correct. This is the weakness of the white-box test techniques. They are not able to identify the missing implementation, because they are based solely on the test object structure, not on the requirements specification.\n\nWhy not the others?\n\na) The fundamental strength of white-box test techniques is that the entire software implementation is taken into account during testing.\nb) White-box coverage measures provide an objective measure of coverage and provide the necessary information to allow additional tests to be generated to increase this coverage.\nc) White-box test techniques can be used to perform reviews (static testing)."},{"q":"Which of the following BEST describes the concept behind error guessing?","opts":["Error guessing involves using your knowledge and experience of defects found in the past and typical errors made by developers","Error guessing involves using your personal experience of development and the errors you made as a developer","Error guessing requires you to imagine that you are the user of the test object and to guess errors the user could make interacting with it","Error guessing requires you to rapidly duplicate the development task to identify the sort of errors a developer might make"],"correct":[0],"exp_count":1,"exp":"a) is correct. The basic concept behind error guessing is that the tester tries to guess what errors may have been made by the developer and what defects may be in the test object based on past experience (and sometimes checklists).\n\nWhy not the others?\n\nb) Although a tester who used to be a developer may use their personal experience to help them when performing error guessing, the test technique is not based on prior knowledge of development.\nc) Error guessing is not a usability technique for guessing how users may fail to interact with the test object.\nd) Duplicating the development task has several flaws that make it impractical, such as the tester having equivalent skills to the developer and the time involved to perform the development. It is not error guessing."},{"q":"In your project there has been a delay in the release of a brand-new application and test execution started late, but you have very detailed domain knowledge and good analytical skills. The full list of requirements has not yet been shared with the team, but management is asking for some test results to be presented. Which test technique fits BEST in this situation?","opts":["Checklist-based testing","Error guessing","Exploratory testing","Branch testing"],"correct":[2],"exp_count":1,"exp":"c) Exploratory testing is correct. Exploratory testing is most useful when there are few known specifications and/or there is a pressing timeline for testing.\n\nWhy not the others?\n\na) This is a new product. You probably do not have a checklist yet and test conditions might not be known due to missing requirements.\nb) This is a new product. You probably do not have enough information to make correct error guesses.\nd) Branch testing is time-consuming, and your management is asking about some test results now. Also, branch testing does not involve domain knowledge."},{"q":"Which of the following BEST describes the way acceptance criteria can be documented?","opts":["Performing retrospectives to determine the actual needs of the stakeholders regarding a given user story","Using the given/when/then format to describe an example test condition related to a given user story","Using verbal communication to reduce the risk of misunderstanding the acceptance criteria by others","Documenting risks related to a given user story in a test plan to facilitate the risk-based testing of a given user story"],"correct":[1],"exp_count":1,"exp":"b) is correct. This is the standard way to document acceptance criteria.\n\nWhy not the others?\n\na) Retrospectives are used to capture lessons learned and to improve the development and testing process, not to document the acceptance criteria.\nc) Verbal communication does not allow to physically document the acceptance criteria as part of a user story (\"card\" aspect in the 3C's model).\nd) Acceptance criteria are related to a user story, not a test plan. Also, acceptance criteria are the conditions that have to be fulfilled to decide if the user story is complete. Risks are not such conditions."},{"q":"Consider the following user story: As an Editor I want to review content before it is published so that I can ensure the grammar is correct and its acceptance criteria:\n\n• The user can log in to the content management system with \"Editor\" role\n• The editor can view existing content pages\n• The editor can edit the page content\n• The editor can add markup comments\n• The editor can save changes\n• The editor can reassign to the \"content owner\" role to make updates\n\nWhich of the following is the BEST example of an ATDD test for this user story?","opts":["Test if the editor can save the document after edit the page content","Test if the content owner can log in and make updates to the content","Test if the editor can schedule the edited content for publication","Test if the editor can reassign to another editor to make updates"],"correct":[0],"exp_count":1,"exp":"a) is correct. This test covers two acceptance criteria: one about editing the document and one about saving changes.\n\nWhy not the others?\n\nb) Acceptance criteria cover the editor activities, not the content owner activities.\nc) Scheduling the edited content for publication may be a nice feature, but it is not covered by the acceptance criteria.\nd) Acceptance criteria state about reassigning from an editor to the content owner, not to another editor."},{"q":"How do testers add value to iteration and release planning?","opts":["Testers determine the priority of the user stories to be developed","Testers focus only on the functional aspects of the system to be tested","Testers participate in the detailed risk identification and risk assessment of user stories","Testers guarantee the release of high-quality software through early test design during the release planning"],"correct":[2],"exp_count":1,"exp":"c) is correct. According to the syllabus, this is one of the ways testers add value to iteration and release planning.\n\nWhy not the others?\n\na) Priorities for user stories are determined by the business representative together with the development team.\nb) Testers focus on both functional and non-functional aspects of the system to be tested.\nd) Early test design is not part of release planning. Early test design does not automatically guarantee the release of quality software."},{"q":"Which TWO of the following options are the exit criteria for testing a system?","opts":["Test environment readiness","The ability to log in to the test object by the tester","Estimated defect density is reached","Requirements are translated into given/when/then format","Regression tests are automated"],"correct":[2,4],"exp_count":2,"exp":"c) and e) are correct.\n\nc) Estimated defect density is a measure of diligence; hence it belongs to the exit criteria.\ne) Automation of regression tests is a completion criterion; hence it belongs to the exit criteria.\n\nWhy not the others?\n\na) Test environment readiness is a resource availability criterion; hence it belongs to the entry criteria.\nb) This is a resource availability criterion; hence it belongs to the entry criteria.\nd) Requirements translated into a given format result in testable requirements; hence it belongs to the entry criteria."},{"q":"Your team uses the three-point estimation technique to estimate the test effort for a new high-risk feature. The following estimates were made:\n\n• Most optimistic estimation: 2 person-hours\n• Most likely estimation: 11 person-hours\n• Most pessimistic estimation: 14 person-hours\n\nWhat is the final estimate?","opts":["9 person-hours","14 person-hours","11 person-hours","10 person-hours"],"correct":[3],"exp_count":1,"exp":"d) 10 person-hours is correct.\n\nIn the three-point estimation technique:\nE = (optimistic + 4 * most likely + pessimistic) / 6\nE = (2 + (4 * 11) + 14) / 6 = 10\n\nWhy not the others?\n\na), b), c) — Incorrect calculations."},{"q":"You are testing a mobile application that allows users to find a nearby restaurant based on the type of food they want to eat. Consider the following list of test cases, priorities (i.e., a smaller number means a higher priority), and dependencies:\n\n{{table:0}}\n\nWhich of the following test cases should be executed as the third one?","opts":["TC 003","TC 005","TC 002","TC 001"],"correct":[0],"exp_count":1,"exp":"a) TC 003 is correct.\n\nTest TC 001 must come first, followed by TC 002, to satisfy dependencies.\nAfterwards, TC 003 to satisfy priority and then TC 004, followed by TC 005.\n\nWhy not the others?\n\nb), c), d) — Do not respect dependencies and priority ordering.","tables":["<div class=\"q-table-wrap\"><table class=\"q-table\"><thead><tr><th>TC#</th><th>Test condition covered</th><th>Priority</th><th>Logical dependency</th></tr></thead><tbody><tr><td>TC 001</td><td>Select type of food</td><td>3</td><td>None</td></tr><tr><td>TC 002</td><td>Select restaurant</td><td>2</td><td>TC 001</td></tr><tr><td>TC 003</td><td>Get directions</td><td>1</td><td>TC 002</td></tr><tr><td>TC 004</td><td>Call restaurant</td><td>2</td><td>TC 002</td></tr><tr><td>TC 005</td><td>Make reservation</td><td>3</td><td>TC 002</td></tr></tbody></table></div>"]},{"q":"Consider the following test categories (1-4) and agile testing quadrants (A-D):\n\n1. Usability testing\n2. Component testing\n3. Functional testing\n4. Reliability testing\n\nA. Agile testing quadrant Q1: technology facing, supporting the development team\nB. Agile testing quadrant Q2: business facing, supporting the development team\nC. Agile testing quadrant Q3: business facing, critique the product\nD. Agile testing quadrant Q4: technology facing, critique the product\n\nHow do the following test categories map onto the agile testing quadrants?","opts":["1C, 2A, 3B, 4D","1D, 2A, 3C, 4B","1C, 2B, 3D, 4A","1D, 2B, 3C, 4A"],"correct":[0],"exp_count":1,"exp":"a) 1C, 2A, 3B, 4D is correct.\n\n• Usability testing is in Q3 (1 – C)\n• Component testing is in Q1 (2 – A)\n• Functional testing is in Q2 (3 – B)\n• Reliability testing is in Q4 (4 – D)\n\nWhy not the others?\n\nb), c), d) — Have one or more incorrect mappings."},{"q":"During a risk analysis the following risk was identified and assessed:\n\n• Risk: Response time is too long to generate a report\n• Risk likelihood: medium; risk impact: high\n• Response to risk:\n ◦ An independent test team performs performance efficiency testing during system testing\n ◦ A selected sample of end users performs alpha testing and beta testing before the release\n\nWhat measure is proposed to be taken in response to this analyzed risk?","opts":["Risk acceptance","Contingency plan","Risk mitigation","Risk transfer"],"correct":[2],"exp_count":1,"exp":"c) Risk mitigation is correct. The proposed actions are related to testing, which is a form of risk mitigation.\n\nWhy not the others?\n\na) We do not accept the risk; concrete actions are proposed.\nb) No contingency plans are proposed.\nd) Risk is not transferred but mitigated."},{"q":"Which work product can be used by an agile team to show the amount of work that has been completed and the amount of total work remaining for a given iteration?","opts":["Acceptance criteria","Defect report","Test completion report","Burndown chart"],"correct":[3],"exp_count":1,"exp":"d) Burndown chart is correct. Burndown charts are a graphical representation of work left to do versus time remaining. They are updated daily, so they can continuously show the work progress.\n\nWhy not the others?\n\na) Acceptance criteria are the conditions used to decide whether the user story is ready. They cannot show work progress.\nb) Defect reports inform about the defects. They do not show work progress.\nc) Test completion report can be created after the iteration is finished, so it will not show the progress continuously within an iteration."},{"q":"You need to update one of the automated test scripts to be in line with a new requirement. Which process indicates that you create a new version of the test script in the test repository?","opts":["Traceability management","Maintenance testing","Configuration management","Requirements engineering"],"correct":[2],"exp_count":1,"exp":"c) Configuration management is correct. To support testing, configuration management may involve the version control of all test items.\n\nWhy not the others?\n\na) Traceability is the relationship between two or more work products, not between different versions of the same work product.\nb) Maintenance testing is about testing changes; it is not related closely to versioning.\nd) Requirements engineering is the elicitation, documentation, and management of requirements; it is not closely related to test script versioning."},{"q":"You received the following defect report from the developers stating that the anomaly described in this test report is not reproducible.\n\n<em>Application hangs up</em>\n\n<em>2022-May-03 – John Doe – Rejected</em>\n\n<em>The application hangs up after entering \"Test input: $ä\" in the Name field on the new user creation screen. Tried to log off, log in with test_admin01 account, same issue. Tried with other test admin accounts, same issue. No error message received; log (see attached) contains fatal error notification. Based on the test case TC-1305, the application should accept the provided input and create the user. Please fix with high priority, this feature is related to REQ-0012, which is a critical new business requirement.</em>\n\nWhat critical information is MISSING from this test report that would have been useful for the developers?","opts":["Expected result and actual result","References and defect status","Test environment and test item","Priority and severity"],"correct":[2],"exp_count":1,"exp":"c) Test environment and test item is correct. We do not know in which test environment the anomaly was detected, and we also do not know which application (and its version) is affected.\n\nWhy not the others?\n\na) The expected result is \"the application should accept the provided input and create the user\". The actual result is \"The application hangs up after entering \"Test input. $ä\"\".\nb) There is a reference to the test case and to the related requirement and it states that the defect is rejected. Also, the defect status would not be very helpful for the developers.\nd) The defect report states that the anomaly is urgent, that it is a global issue (i.e., many, if not all, test administration accounts are affected) and states the impact is high for business stakeholders."},{"q":"Which test activity does a data preparation tool support?","opts":["Test monitoring and test control","Test analysis","Test design and test implementation","Test completion"],"correct":[2],"exp_count":1,"exp":"c) Test design and test implementation is correct. Test design and implementation can both include the identification, creation or acquisition of the testware necessary for test execution (e.g., test data).\n\nWhy not the others?\n\na) Test monitoring involves the ongoing checking of all activities and comparison of actual progress against the test plan. Test control involves taking the actions necessary to meet the test objectives of the test plan. No test data are prepared during these activities.\nb) Test analysis includes analysis of the test basis to identify test conditions and prioritize them. Test data are not prepared during this activity.\nd) Test completion activities occur at project milestones (e.g., release, end of iteration, test level completion), so it is too late for preparing test data."},{"q":"Which item correctly identifies a potential risk of performing test automation?","opts":["It may introduce unknown regressions in production","Sufficient efforts to maintain testware may not be properly allocated","Testing tools and associated testware may not be sufficiently relied upon","It may reduce the time allocated for manual testing"],"correct":[1],"exp_count":1,"exp":"b) is correct. Wrong allocation of effort to maintain testware is a risk.\n\nWhy not the others?\n\na) Test automation does not introduce unknown regressions in production.\nc) Test tools must be selected so that they and their testware can be relied upon.\nd) The primary goal of test automation is to reduce manual testing. So, this is a benefit, not a risk."}]};
const GLOSSARY=[{"term":"Cost of quality","definition":"The total costs incurred on quality activities and issues and often split into prevention costs, appraisal costs, internal failure costs and external failure costs"},{"term":"Coverage","definition":"The degree to which specified coverage items are exercised by a test suite, expressed as a percentage"},{"term":"Debugging","definition":"The process of finding, analysing and removing the causes of failures in a component or system."},{"term":"Defect","definition":"An imperfection or deficiency in a work product where it does not meet its requirements or specifications"},{"term":"Dynamic testing","definition":"Testing that involves the execution of the test item"},{"term":"Error","definition":"A human action that produces an incorrect result"},{"term":"Failure","definition":"An event in which a component or system does not perform a required function within specified limits"},{"term":"QA","definition":"Activities focused on providing confidence that quality requirements will be fulfilled"},{"term":"QC","definition":"Activities designed to evaluate the quality of a component or system"},{"term":"Quality","definition":"The degree to which a work product satisfies stated and implied needs of its stakeholders"},{"term":"Quality management","definition":"The process of establishing and directing a quality policy, quality objectives, quality planning, quality control, quality assurance, and quality improvement for an organisation"},{"term":"Root cause","definition":"A source of a defect such that if it is removed, the occurrence of the defect type is decreased or removed"},{"term":"Static testing","definition":"Testing that does not involve the execution of a test item"},{"term":"Test analysis","definition":"The activity that identifies test conditions by analysing the test basis"},{"term":"Test basis","definition":"The body of knowledge used as the basis for test analysis and design"},{"term":"Test case","definition":"A set of preconditions, inputs, actions (where applicable), expected results and postconditions, developed based on test conditions"},{"term":"Test completion","definition":"The activity that makes testware available for later use, leaves test environments in a satisfactory condition and communicates the results of testing to relevant stakeholders"},{"term":"Test condition","definition":"A testable aspect of a component or system identified as a basis for testing"},{"term":"Test control","definition":"The activity that develops and applies corrective actions to get a test project on track when it deviates from what was planned"},{"term":"Test data","definition":"Data needed for test execution"},{"term":"Test design","definition":"The activity that derives and specifies test cases from test conditions"},{"term":"Test environment","definition":"An environment containing hardware, instrumentation, simulators, software tools, and other support elements needed to conduct a test"},{"term":"Test execution","definition":"The activity that runs a test on a component or system producing actual results"},{"term":"Test implementation","definition":"The activity that prepares the testware needed for test execution based on test analysis and design"},{"term":"Test manager","definition":"The person responsible for project management of testing activities, resources, and evaluation of a test object"},{"term":"Test monitoring","definition":"The activity that checks the status of testing activities, identifies any variances from planned or expected, and reports status to stakeholders"},{"term":"Test object","definition":"The work product to be tested"},{"term":"Test item","definition":"A part of a test object used in the test process"},{"term":"Test objective","definition":"The purpose for testing"},{"term":"Test oracle","definition":"A source to determine an expected result to compare with the actual result of the system under test"},{"term":"Test planning","definition":"The activity of establishing or updating a test plan"},{"term":"Test procedure","definition":"A sequence of test cases in execution order, and any associated actions that may be required to set up the initial preconditions and any wrap up activities post execution"},{"term":"Test process","definition":"The set of interrelated activities comprising of test planning, test monitoring and control, test analysis, test design, test implementation, test execution, and test completion"},{"term":"Test result","definition":"The consequence/outcome of the execution of a test"},{"term":"Test run","definition":"The execution of a test suite on a specific version of the test object"},{"term":"Test suite","definition":"A set of test scripts or test procedures to be executed in a specific test run"},{"term":"Tester","definition":"A person who performs testing"},{"term":"Testing","definition":"The process within the software development lifecycle that evaluates the quality of a component or system and related work products"},{"term":"Testware","definition":"Work products produced during the test process for use in planning, designing, executing, evaluating and reporting on testing"},{"term":"Traceability","definition":"The ability to establish explicit relationships between related work products or items within work products"},{"term":"Validation","definition":"Confirmation by examination that a work product matches a stakeholder's needs"},{"term":"Verification","definition":"Confirmation by examination and through provision of objective evidence that specified requirements have been fulfilled"},{"term":"Alpha testing","definition":"A type of acceptance testing performed in the developer's test environment by roles outside the development organisation"},{"term":"Acceptance testing","definition":"A test level that focuses on determining whether to accept the system"},{"term":"Agile software development","definition":"A group of software development methodologies based on iterative incremental development, where requirements and solutions evolve through collaboration between self-organising cross-functional teams"},{"term":"ATDD","definition":"A collaboration-based test-first approach that defines acceptance tests in the stakeholders' domain language"},{"term":"BDD","definition":"A collaborative approach to development in which the team is focusing on delivering expected behaviour of a component or system for the customer, which forms the basis for testing"},{"term":"beta testing","definition":"A type of acceptance testing performed at an external site to the developer's test environment by roles outside the development organisation"},{"term":"Black-box testing","definition":"Testing based on an analysis of the specification of the component or system"},{"term":"CAT","definition":"A type of acceptance testing performed to verify whether a system satisfies its contractual requirements"},{"term":"Change-related testing","definition":"A type of testing initiated by modification to a component or system"},{"term":"Component integration testing","definition":"The integration testing of components"},{"term":"Component testing","definition":"A test level that focuses on individual hardware or software components"},{"term":"Confirmation testing","definition":"A type of change-related testing performed after fixing a defect to confirm that a failure caused by that defect does not reoccur."},{"term":"CD","definition":"A software engineering approach in which teams produce software in short cycles, ensuring that the software can be reliably released at any time and, following a pipeline through a \"production-like environment\", without doing so manually"},{"term":"CI","definition":"An automated software development procedure that merges, integrates and tests all changes as soon as they are committed"},{"term":"COTS","definition":"A type of product developed in an identical format for a large number of customers in the general market"},{"term":"DDD","definition":"A software development approach that focuses on building software systems that closely align with the business domain they serve"},{"term":"DevOps","definition":"An organisational approach aiming to create synergy by getting development (including testing) and operations to work together to achieve a set of common goals"},{"term":"Driver","definition":"A component or tool that temporarily replaces another component and controls or calls a test item in isolation"},{"term":"FDD","definition":"An iterative and incremental software development process driven from a client-valued functionality (feature) perspective. Feature-driven development is mostly used in Agile software development"},{"term":"FN result","definition":"A test result which fails to identify a defect that is actually present in a test object."},{"term":"FP result","definition":"A test result in which a defect is reported although no such defect actually exists in the test object"},{"term":"Functional testing","definition":"Testing performed to evaluate if a component or system satisfies functional requirements (ISO 24765)"},{"term":"Impact analysis","definition":"The identification of all work products affected by a change, including an estimate of the resources needed to accomplish the change (ISO 24765)"},{"term":"Incremental development model","definition":"A type of software development lifecycle model in which the component or system is developed through a series of increments"},{"term":"Integration testing","definition":"A test level that focuses on interactions between components or systems"},{"term":"Iterative development model","definition":"A type of software development lifecycle model in which the component or system is developed through a series of repeated cycles"},{"term":"Kanban","definition":"A visual management method and agile project management framework that aims to optimise workflow, improve efficiency, and increase transparency in software development and various other industries, originally inspired by the Toyota production system"},{"term":"Lean IT","definition":"A systematic approach to identifying and eliminating waste in processes to deliver more value to customers with fewer resources"},{"term":"Maintenance testing","definition":"Testing the changes to an operational system or the impact of a changed environment to an operational system"},{"term":"Non-functional testing","definition":"Testing performed to evaluate that a component or system complies with non-functional requirements"},{"term":"OAT","definition":"A type of acceptance testing performed to determine if operations and/or systems administration staff can accept a system"},{"term":"RAT","definition":"A type of acceptance testing performed to determine the compliance of the test object"},{"term":"Regression testing","definition":"A type of change-related testing to detect whether defects have been introduced or uncovered in unchanged areas of the software"},{"term":"Retrospective","definition":"A regular event in which team members discuss results, review their practices, and identify ways to improve"},{"term":"Scrum","definition":"An agile framework for managing and organising software development projects providing a flexible and iterative approach to project management, emphasising collaboration, self-organisation, and continuous improvement"},{"term":"SDLC","definition":"The activities performed at each stage in software development, and how they relate to one another logically and chronologically"},{"term":"Sequential development model","definition":"A type of software development lifecycle model in which a complete system is developed in a linear way of several discrete and successive phases with no overlap between them"},{"term":"Shift-left","definition":"An approach to performing testing and quality assurance activities as early as possible in the software development lifecycle"},{"term":"Sprint","definition":"A short, time-boxed period when a scrum team works to complete a set amount of work of typically lasting two to four weeks"},{"term":"Stub","definition":"A skeletal or special-purpose implementation of a software component, used to develop or test a component that calls or is otherwise dependent on it. It replaces a called component"},{"term":"System integration testing","definition":"The integration testing of systems"},{"term":"System testing","definition":"A test level that focuses on verifying that a system as a whole meets specified requirements"},{"term":"TDD","definition":"A software development technique in which the test cases are developed, automated and then the software is developed incrementally to pass those test cases"},{"term":"Test level","definition":"A specific instantiation of a test process"},{"term":"Test type","definition":"A group of test activities based on specific test objectives aimed at specific characteristics of a component or system"},{"term":"UAT","definition":"A type of acceptance testing performed to determine if intended users accept the system"},{"term":"V-model","definition":"A sequential software development lifecycle model describing a one-for-one relationship between major phases of software development from business requirements specification to delivery, and corresponding test levels from acceptance testing to component testing"},{"term":"White-box testing","definition":"Testing based on an analysis of the internal structure of the component or system"},{"term":"XP","definition":"An Agile software development methodology intended to improve software quality and responsiveness to changing customer requirements"},{"term":"Anomaly","definition":"A condition that deviates from expectation (ISO 24765)"},{"term":"Dynamic testing","definition":"Testing that involves the execution of the test item (ISO 29119-1)"},{"term":"Epic","definition":"A large user story that cannot be delivered as defined within a single iteration or is large enough that it can be split into smaller user stories."},{"term":"Formal review","definition":"A review that follows a defined process with a formally documented output (ISO 20246)"},{"term":"Informal review","definition":"A type of review that does not follow a defined process and has no formally documented output."},{"term":"Inspection","definition":"A type of formal review that uses defined team roles and measurement to identify defects in a work product, and improve the review process and the software development process (ISO 20246)"},{"term":"Moderator","definition":"The person responsible for running review meetings"},{"term":"Requirement specification","definition":"A formal document that defines the requirements, objectives and expectations for a system, product, or service"},{"term":"Review","definition":"A type of static testing in which the quality of a work product or process is evaluated by individuals"},{"term":"Reviewer","definition":"A participant in a review who identifies defects in the work product (ISO 20246)"},{"term":"Scribe","definition":"A person who records information at a review meeting"},{"term":"Static analysis","definition":"The process of evaluating a component or system without executing it, based on its form, structure, content, or documentation (ISO 24765)"},{"term":"Static testing","definition":"Testing that does not involve the execution of a test item"},{"term":"Technical review","definition":"A formal review by technical experts that examine the quality of a work product and identify discrepancies from specifications and standards (IEEE 1028)"},{"term":"User story","definition":"A user or business requirement consisting of one sentence expressed in the everyday or business language which is capturing the functionality a user needs, the reason behind it, any non-functional criteria, and also including acceptance criteria"},{"term":"Walkthrough","definition":"A type of review in which an author leads members of the review through a work product and the members ask questions and make comments about possible issues (ISO 20246)"},{"term":"Acceptance criteria","definition":"The criteria that a work product must satisfy to be accepted by the stakeholders"},{"term":"Black-box test technique","definition":"A test technique based on the specification of a component or system"},{"term":"Boundary value","definition":"A minimum or maximum value of an ordered equivalence partition"},{"term":"Branch","definition":"A transfer of control between two consecutive nodes in the control flow graph of a test item"},{"term":"Branch coverage","definition":"The coverage of branches in a control flow graph"},{"term":"Branch testing","definition":"A white-box test technique in which the test conditions are branches"},{"term":"BVA","definition":"A black-box test technique in which test cases are designed based on boundary values"},{"term":"Checklist-based testing","definition":"An experience-based test technique in which test cases are designed to exercise the items of a checklist"},{"term":"Collaboration-based test approach","definition":"An approach to testing that focuses on defect avoidance by collaborating among stakeholders"},{"term":"Coverage","definition":"The degree to which specified coverage items are exercised by a test suite, expressed as a percentage."},{"term":"Coverage item","definition":"An attribute or combination of attributes derived from one or more test conditions by using a test technique"},{"term":"Decision table testing","definition":"A black-box test technique in which test cases are designed to exercise the combinations of conditions and the resulting actions shown in a decision table"},{"term":"Each Choice coverage","definition":"It is required that test cases exercise each partition from each set of partitions at least once"},{"term":"EP","definition":"A black-box test technique in which test conditions are equivalence partitions exercised by one representative member of each partition (ISO 29119-1)"},{"term":"Equivalence partition","definition":"A subset of the value domain of a variable within a component or system in which all values are expected to be treated the same based on the specification"},{"term":"Error guessing","definition":"A test technique in which tests are derived on the basis of the tester's knowledge of past failures, or general knowledge of failure modes (ISO 29119-1)"},{"term":"Experience-based test technique","definition":"A test technique based on the tester's experience, knowledge and intuition"},{"term":"Exploratory testing","definition":"An approach to testing in which the testers dynamically design and execute tests based on their knowledge, exploration of the test item and the results of previous tests (ISO 29119-1)"},{"term":"Fault attack","definition":"A test technique to evaluate a specific quality characteristic of a test object by attempting to trigger specific failures"},{"term":"Invalid partition","definition":"A partition containing invalid values"},{"term":"Session-based testing","definition":"A test approach in which test activities are planned as test sessions"},{"term":"State transition testing","definition":"A black-box test technique in which test cases are designed to exercise elements of a state transition model (ISO 29119-4)"},{"term":"Statement","definition":"An entity in a programming language, which is typically the smallest indivisible unit of execution"},{"term":"Statement coverage","definition":"The coverage of executable statements"},{"term":"Statement testing","definition":"A white-box test technique in which test cases are designed to execute statements"},{"term":"Test charter","definition":"Documentation of the goal or objective for a test session"},{"term":"Test session","definition":"An uninterrupted period of time spent in executing tests"},{"term":"Test technique","definition":"A procedure used to define test conditions, design test cases, and specify test data"},{"term":"User story","definition":"A user or business requirement consisting of one sentence expressed in the everyday or business language which is capturing the functionality a user needs, the reason behind it, any non-functional criteria, and also including acceptance criteria"},{"term":"Valid partition","definition":"A partition containing valid values"},{"term":"White-box test technique","definition":"A test technique based on the internal structure of a component or system"},{"term":"Actual result","definition":"The behaviour produced/observed when a component or system is tested"},{"term":"CM","definition":"A discipline applying technical and administrative direction and surveillance to identify and document the functional and physical characteristics of a configuration item, control changes to those characteristics, record and report change processing and implementation status, and verify that it complies with specified requirements"},{"term":"Defect management","definition":"The process of recognising, recording, classifying, investigating, fixing and disposing of defects"},{"term":"Defect report","definition":"Documentation of the occurrence, nature, and status of a defect"},{"term":"Entry criteria","definition":"The set of conditions for officially starting a defined task"},{"term":"Exit criteria","definition":"The set of conditions for officially completing a defined task"},{"term":"Expected result","definition":"The observable predicted behaviour of a test item under specified conditions based on its test basis (ISO 29119-1)"},{"term":"Planning poker","definition":"A consensus-based estimation technique, mostly used to estimate effort or relative size of user stories in Agile software development. It is a variation of the Wideband Delphi method using a deck of cards with values representing the units in which the team estimates"},{"term":"Priority","definition":"The level of (business) importance assigned to an item, e.g., defect"},{"term":"Product risk","definition":"A risk that impacts the quality of a product"},{"term":"Project risk","definition":"A risk that impacts project success"},{"term":"Risk","definition":"A factor that could result in future negative consequences"},{"term":"Risk analysis","definition":"The overall process of risk identification and risk assessment"},{"term":"Risk assessment","definition":"The process to examine identified risks and determine the risk level"},{"term":"Risk control","definition":"The overall process of risk mitigation and risk monitoring"},{"term":"Risk identification","definition":"The process of finding, recognising and describing risks (ISO 31000)"},{"term":"Risk impact","definition":"The damage that will be caused if a risk becomes an actual outcome or event"},{"term":"Risk level","definition":"The measure of a risk defined by risk impact and risk likelihood"},{"term":"Risk likelihood","definition":"The probability that a risk will become an actual outcome or event"},{"term":"Risk management","definition":"The process for handling risks (ISO 24765)"},{"term":"Risk matrix","definition":"A matrix that is used during risk assessment to define the level of risk by considering the category of likelihood against the category of impact"},{"term":"Risk mitigation","definition":"The process through which decisions are reached and protective measures are implemented for reducing or maintaining risks to specified levels"},{"term":"Risk monitoring","definition":"The activity that checks and reports the status of known risks to stakeholders"},{"term":"Risk-based testing","definition":"A test approach in which the management, selection, prioritisation, and use of test activities and resources are based on corresponding risk types and risk levels (ISO 29119-1)"},{"term":"Severity","definition":"The degree of impact that a defect has on the development or operation of a component or system"},{"term":"Smoke test","definition":"A test suite that covers the main functionality of a component or system to determine whether it works properly before planned testing begins"},{"term":"Test approach","definition":"The manner of implementing testing tasks"},{"term":"Test completion","definition":"The activity that makes testware available for later use, leaves test environments in a satisfactory condition and communicates the results of testing to relevant stakeholders"},{"term":"Test completion report","definition":"A type of test report produced at completion milestones that provides an evaluation of the corresponding test items against exit criteria"},{"term":"Test control","definition":"The activity that develops and applies corrective actions to get a test project on track when it deviates from what was planned"},{"term":"Test estimation","definition":"An approximation related to various aspects of testing"},{"term":"Test monitoring","definition":"The activity that checks the status of testing activities, identifies any variances from planned or expected, and reports status to stakeholders"},{"term":"Test plan","definition":"Documentation describing the test objectives to be achieved and the means and the schedule for achieving them, organised to coordinate testing activities (ISO 29119-1)"},{"term":"Test planning","definition":"The activity of establishing or updating a test plan"},{"term":"Test progress report","definition":"A type of periodic test report that includes the progress of test activities against a baseline, risks, and alternatives requiring a decision"},{"term":"Test pyramid","definition":"A graphical model representing the relationship of the amount of testing per level, with more at the bottom than at the top"},{"term":"Test reporting","definition":"Collecting and analysing data from testing activities and subsequently consolidating the data in a report to inform stakeholders"},{"term":"Test scope","definition":"A description of the test object and its features to be tested"},{"term":"Test strategy","definition":"A description of how to perform testing to reach test objectives under given circumstances"},{"term":"Testing quadrants","definition":"A classification model of test types/test levels in four quadrants, relating them to two dimensions of test objectives: supporting the product team versus critiquing the product, and technology-facing versus business-facing"},{"term":"Wideband delphi","definition":"An expert-based test estimation technique that aims at making an accurate estimation using the collective wisdom of the team members"},{"term":"Capture/replay","definition":"A test automation approach in which inputs to a test object are recorded during manual testing to generate automated test scripts that can be executed later"},{"term":"Data-driven testing","definition":"A scripting technique that uses data files to contain the test data and expected results needed to execute the test scripts"},{"term":"Keyword-driven testing","definition":"A scripting technique in which test scripts contain high-level keywords and supporting files that contain low-level scripts that implement those keywords"},{"term":"Probe effect","definition":"The effect on the component or system by the measurement instrument when the component or system is being measured, e.g., by a performance testing tool or monitor. For example, performance may be slightly worse when performance testing tools are being used"},{"term":"Test automation","definition":"The conversion of test activities to automatic operation"}];
const CTAI_GLOSSARY=[{"term":"AI","definition":"The capability of an engineered system to acquire, process, create and apply knowledge and skills (ISO/IEC TR 29119-11)"},{"term":"AI component","definition":"A component that provides AI functionality"},{"term":"AI development framework","definition":"A set of tools and libraries designed to help developers create artificial intelligence (AI) and machine learning (ML) applications more easily"},{"term":"AI effect","definition":"The situation when a previously labelled AI system is no longer\nconsidered to be AI as technology advances (ISO/IEC TR 29119-11)"},{"term":"AI-based system","definition":"A system that integrates one or more AI components"},{"term":"AI-specific processor","definition":"A type of specialised hardware designed to accelerate AI applications"},{"term":"AIaaS","definition":"A software licensing and delivery model in which AI and AI\ndevelopment services are centrally hosted"},{"term":"API","definition":"A type of interface in which the components or systems involved exchange information in a defined formal structure"},{"term":"ASIC","definition":"A kind of integrated circuit that is specially built for a specific application or purpose"},{"term":"association","definition":"An unsupervised learning technique that identifies relationships and\ndependencies between samples"},{"term":"AWS","definition":"A subsidiary of Amazon that provides on-demand cloud computing platforms and APIs to individuals, companies, and governments, on a metered, pay-as-you-go basis"},{"term":"Bayesian model","definition":"A statistical model that uses probability to represent the uncertainty of both model inputs and outputs"},{"term":"BERT","definition":"A natural language model based on the transformer architecture, notable for its dramatic improvement over previous state of the art models"},{"term":"case-based reasoning","definition":"The technique of solving a new problem based on the solutions of\nsimilar past problems"},{"term":"chatbot","definition":"An application used to conduct a conversation via text or text-to-speech"},{"term":"classification","definition":"A type of ML function that predicts the output class for a given input\n(After ISO/IEC TR 29119-11)"},{"term":"classifier","definition":"An ML model used for classification"},{"term":"clustering","definition":"A type of ML function that groups similar data points together"},{"term":"clustering algorithm","definition":"A type of ML algorithm used to group similar objects into clusters"},{"term":"CNTK","definition":"An open-source Microsoft deep-learning toolkit"},{"term":"CPU","definition":"A general-purpose processor with a small number of cores that are optimised for low-latency tasks that require single-threaded performance, such as running operating systems, web browsing or office applications"},{"term":"dataset","definition":"A collection of data used for training, evaluation, testing and prediction in ML"},{"term":"DBSCAN","definition":"An ML algorithm which operates on data density. The basic concept is to find areas of high density, which are separated from each other by areas of low density"},{"term":"decision tree","definition":"A tree-like ML model whose nodes represent decisions, and whose branches represent possible outcomes"},{"term":"deductive classifier","definition":"A classifier based on the application of inference and logic to input data"},{"term":"DIN","definition":"German Institute for Standardisation"},{"term":"DL","definition":"ML using neural networks with multiple layers"},{"term":"DNN","definition":"A neural network comprised of several layers of neurons"},{"term":"edge computing","definition":"The part of a distributed architecture in which information processing is performed close to where that information is used"},{"term":"EU","definition":"An international organisation comprising 27 European countries and governing common economic, social, and security policies"},{"term":"feature","definition":"An individual measurable attribute of the input data used for training by an ML algorithm and for prediction by an ML model"},{"term":"fuzzy logic","definition":"A type of logic based on the concept of partial truth represented by certainty factors between 0 and 1"},{"term":"GDPR","definition":"The European Union (EU) regulation on data protection and privacy\nthat applies to the data of citizens of the EU and the European\nEconomic Area"},{"term":"general AI","definition":"AI that exhibits intelligent behaviour comparable to a human across the full range of cognitive abilities (ISO/IEC TR 29119-11)"},{"term":"genetic algorithm","definition":"A search-based algorithm used for solving optimisation problems in machine learning"},{"term":"GPTs","definition":"A type of large language model and a prominent framework for generative artificial intelligence. They are artificial neural networks that are used in natural language processing tasks. GPTs are based on the transformer architecture, pre-trained on large data sets of unlabelled text, and able to generate novel human-like content"},{"term":"GPU","definition":"A specialised processor originally designed to handle graphics-related tasks capable of performing thousands of parallel calculations simultaneously and more efficiently than CPUs, thanks to a large number of smaller cores"},{"term":"IBM","definition":"An American multinational technology company headquartered in Armonk, New York"},{"term":"IEC","definition":"An international standards organisation that prepares and publishes international standards for all electrical, electronic and related technologies – collectively known as \"electrotechnology\""},{"term":"IEEE","definition":"An American professional association for electronics engineering, electrical engineering, and other related disciplines"},{"term":"intelligent agent","definition":"An autonomous program which directs its activity towards achieving\ngoals using observations and actions"},{"term":"ISO","definition":"An independent, non-governmental, international standard development organisation composed of representatives from the national standards organisations of member countries"},{"term":"JTC","definition":"Joint Technical Committee"},{"term":"K-means","definition":"An ML algorithm which attempts to cluster the data into a predetermined number of groups by minimising the distance from the cluster centre to the objects"},{"term":"linear regression","definition":"A statistical technique that models the relationship between variables by fitting a linear equation to the observed data when the target variable is numeric"},{"term":"logistic regression","definition":"A statistical technique that models the relationship between variables when the target variable is categorical rather than numeric"},{"term":"ML","definition":"The process using computational techniques to enable systems to learn from data or experience (ISO/IEC TR 29119-11)"},{"term":"ML framework","definition":"A tool or library that supports the creation of an ML model"},{"term":"ML model","definition":"ML output of an ML algorithm trained with a training dataset that generates predictions using patterns in the input data (ISO/IEC TR 29119-11)"},{"term":"ML model training","definition":"The process of applying the ML algorithm to the training dataset to create an ML model"},{"term":"narrow AI","definition":"AI focused on a single well-defined task to address a specific problem (ISO/IEC TR 29119-11)"},{"term":"neuromorphic processor","definition":"An integrated circuit designed to mimic the biological neurons of the human brain"},{"term":"NLP","definition":"A field of computing that provides the ability to read, understand, and\nderive meaning from natural languages"},{"term":"NPU","definition":"A class of specialised hardware accelerator or computer system designed to accelerate artificial intelligence and machine learning applications, including artificial neural networks and machine vision"},{"term":"pre-trained model","definition":"An ML model already trained when it was obtained"},{"term":"procedural reasoning","definition":"AI technology used for constructing real-time reasoning systems that can perform complex tasks in dynamic environments"},{"term":"random forest","definition":"Ensemble ML technology for classification, regression and other tasks that operate by constructing and running many decision trees and then either outputting the mode of the class or the mean prediction of the individual trees"},{"term":"reasoning technique","definition":"AI that generates conclusions from available information using logical\ntechniques (After ISO/IEC TR 29119-11)"},{"term":"reinforcement learning","definition":"The activity of building an ML model using a process of trial and reward to achieve an objective (After ISO/IEC TR 29119-11)"},{"term":"reward function","definition":"A function that defines the success of reinforcement learning"},{"term":"rule engines","definition":"A set of rules that determine which actions should occur when certain\nconditions are satisfied"},{"term":"SaaS","definition":"A software distribution model in which a cloud provider hosts applications and makes them available to end users over the internet"},{"term":"SC","definition":"Sub Committee"},{"term":"scalable ML","definition":"The ability of a machine learning system to handle ever larger amounts of data and computing resources"},{"term":"search algorithm","definition":"An algorithm that systematically visits a subset of all possible states or structures until the goal state or structure is reached (After ISO/IEC TR 29119-11)"},{"term":"SLA","definition":"A contract between a service provider and its customers that documents what services the provider will furnish and defines the service standards the provider is obligated to meet"},{"term":"SoC","definition":"An integrated circuit that integrates most or all components of a computer or other electronic system"},{"term":"super AI","definition":"An artificial intelligence-based system that far exceeds human\ncapabilities"},{"term":"supervised learning","definition":"Training an ML model from input data and its corresponding labels"},{"term":"SVM","definition":"An ML technique in which the data points are viewed as vectors in multi-dimensional space separated by a hyperplane"},{"term":"technological singularity","definition":"A point in the future when technological advances are no longer controllable by people (After ISO/IEC TR 29119-11)"},{"term":"TPU","definition":"A specialised processor developed by Google specifically to perform highly-optimised operations on tensors, which are multidimensional arrays used in deep learning models"},{"term":"transfer learning","definition":"A technique for modifying a pre-trained ML model to perform a different related task"},{"term":"unsupervised learning","definition":"Training an ML model from input data using an unlabelled dataset"},{"term":"VGG","definition":"An academic group focused on computer vision at Oxford University"},{"term":"von Neumann architecture","definition":"A computer architecture which consists of five main components:\nmemory, a central processing unit, a control unit, input and output"},{"term":"VPU","definition":"A specific type of AI accelerator, designed to accelerate machine vision tasks"},{"term":"adaptability","definition":"The ease with which the system can be modified for new situations, such as different hardware and changing operational environments"},{"term":"algorithmic bias","definition":"A type of bias that occurs when the learning algorithm is incorrectly configured, for example, when it overvalues some data compared to others"},{"term":"autonomy","definition":"The ability of a system to work for sustained periods without human intervention (ISO/IEC TR 29119-11)"},{"term":"bias","definition":"1. The systematic difference in treatment of certain objects, people or groups in comparison to others (ISO/IEC DIS 22989)\n\n2. The statistical measure of the distance between the outputs provided by the system and what are considered to be “fair outputs” which show no favouritism to a particular group"},{"term":"ethics","definition":"A system of accepted beliefs that control behaviour, especially such a system based on morals"},{"term":"evolution","definition":"1. The process of continuous change from a lower, simpler, or worse state to a higher, more complex, or better state\n\n2. The ability of the system to improve itself in response to changing external constraints"},{"term":"explainability","definition":"The level of understanding how the AI-based system came up with a given result (ISO/IEC TR 29119-11)"},{"term":"flexibility","definition":"The ability of a system to work in contexts outside its initial specification (After ISO/IEC TR 29119-11)"},{"term":"G20","definition":"An intergovernmental forum comprising 19 countries and the European Union (EU)"},{"term":"inappropriate bias","definition":"A type of bias that causes a system to produce results that lead to adverse effects for a particular group"},{"term":"interpretability","definition":"The level of understanding how the underlying AI technology works (ISO/IEC TR 29119-11)"},{"term":"ML system","definition":"A system that integrates one or more ML models"},{"term":"OECD","definition":"An intergovernmental organisation with 38 member countries, founded in 1961 to stimulate economic progress and world trade"},{"term":"reward hacking","definition":"The activity performed by an intelligent agent to maximise its reward function to the detriment of meeting the original objective (After ISO/IEC TR 29119-11)"},{"term":"safety","definition":"The expectation that a system does not, under defined conditions, lead to a state in which human life, health, property, or the environment is endangered"},{"term":"sample bias","definition":"A type of bias where the dataset is not fully representative of the data space to which ML is applied"},{"term":"self-learning system","definition":"An adaptive system that changes its behaviour based on learning through trial and error (After ISO/IEC TR 29119-11)"},{"term":"side effect","definition":"A secondary and usually adverse effect"},{"term":"transparency","definition":"The level of visibility of the algorithm and data used by the AI-based system (After ISO/IEC TR 29119-11)"},{"term":"UN","definition":"A diplomatic and political international organisation whose stated purposes are to maintain international peace and security, develop friendly relations among nations, achieve international cooperation, and serve as a centre for harmonising the actions of nations"},{"term":"UNESCO","definition":"A specialised agency of the United Nations (UN) aimed at promoting world peace and security through international cooperation in education, arts, sciences and culture"},{"term":"XAI","definition":"The field of study related to understanding the factors that influence AI system outputs"},{"term":"data acquisition","definition":"The activity of acquiring data relevant to the business problem to be solved by an ML model"},{"term":"data pre-processing","definition":"The activities of data cleaning, data transformation, data augmentation, and data sampling in the ML workflow"},{"term":"data preparation","definition":"The activities of data acquisition, data pre-processing and feature engineering in the ML workflow"},{"term":"EDA","definition":"The interactive, hypothesis-driven and visual exploration of data used to support all data preparation activities (data acquisition, data pre-processing and feature engineering)"},{"term":"epoch","definition":"An iteration of ML training on the whole training dataset"},{"term":"feature engineering","definition":"The activity in which those attributes in the raw data that best represent the underlying relationships that should appear in the ML model are identified for use in the training data (ISO/IEC TR 29119-11)"},{"term":"hyperparameter","definition":"A parameter used to either control the training of an ML model or to set the configuration of an ML model"},{"term":"hyperparameter tuning","definition":"The activity of determining the optimal hyperparameters based on particular goals"},{"term":"ML algorithm","definition":"An algorithm used to create an ML model from a training dataset"},{"term":"ML functional performance criteria","definition":"Criteria based on ML functional performance metrics used as a basis for model evaluation, tuning and testing"},{"term":"ML model deployment","definition":"The process of placing a finished ML model into a live environment where it can be used for its intended purpose"},{"term":"ML model evaluation","definition":"The process of comparing achieved ML functional performance metrics with required criteria and those of other ML models"},{"term":"ML model testing","definition":"The process where the performance of a fully trained ML model is evaluated on an independent testing dataset"},{"term":"ML model tuning","definition":"The process of testing hyperparameters to achieve optimum performance"},{"term":"ML workflow","definition":"A sequence of activities used to manage the development and deployment of an ML model"},{"term":"overfitting","definition":"The generation of an ML model that corresponds too closely to the training dataset, resulting in a model that finds it difficult to generalise to new data (After ISO/IEC TR 29119-11)"},{"term":"regression","definition":"A type of ML function that results in a numerical or continuous output value for a given input (After ISO/IEC TR 29119-11)"},{"term":"training dataset","definition":"A dataset used to train an ML model"},{"term":"underfitting","definition":"The generation of an ML model that does not reflect the underlying trend of the training dataset, resulting in a model that finds it difficult to make accurate predictions (ISO/IEC TR 29119-11)"},{"term":"validation dataset","definition":"A dataset used to evaluate a trained ML model with the purpose of tuning the model"},{"term":"annotation","definition":"The activity of identifying objects in images with bounding boxes to provide labelled data for classification"},{"term":"augmentation","definition":"The activity of creating new data points based on an existing dataset"},{"term":"classification model","definition":"An ML model used for classification"},{"term":"data cleaning","definition":"The activity of data pre-processing when incorrect data, duplicate data or outliers are either removed or corrected, data imputation is used, removal or anonymisation of personal information is performed"},{"term":"data gathering","definition":"The activity of data acquisition when the source of the data is identified and the means for collecting the data are determined"},{"term":"data identification","definition":"The activity of data acquisition when the types of data to be used for training and predictions are identified"},{"term":"data imputation","definition":"The activity of data pre-processing when the missing values are replaced with estimated or guessed values"},{"term":"data labelling","definition":"The activity of adding meaningful tags to objects in raw data to support classification in ML"},{"term":"data pipeline","definition":"The implementation of data preparation activities to provide input data to support training by an ML algorithm or prediction by an ML model"},{"term":"data sampling","definition":"The activity of data pre-processing which involves selection of some part of the total available dataset so that patterns in the larger dataset can be observed"},{"term":"data transformation","definition":"The activity of data pre-processing when the format of the given data is changed"},{"term":"data visualisation","definition":"A technique for graphically representing data relationships, trends and patterns"},{"term":"feature extraction","definition":"The activity which involves the derivation of informative and non-redundant features\nfrom the existing features"},{"term":"feature selection","definition":"The activity which involves the selection of those features which are most likely to contribute to model training and prediction"},{"term":"RAM","definition":"A form of electronic computer memory that can be read and changed in any order, typically used to store working data and machine code"},{"term":"test dataset","definition":"A dataset used to test an ML model and evaluate the model developed from a training dataset"},{"term":"accuracy","definition":"The ML functional performance metric used to evaluate a classifier, which measures the proportion of predictions that were correct (After ISO/IEC TR 29119-11)"},{"term":"AUC","definition":"A measure of how well a classifier can distinguish between two classes"},{"term":"confusion matrix","definition":"A technique for summarising the ML functional performance of a classification algorithm"},{"term":"F1-score","definition":"An ML functional performance metric used to evaluate a classifier which provides a balance between recall and precision"},{"term":"FN","definition":"An ML model prediction in which the model mistakenly predicts the negative class"},{"term":"FP","definition":"An ML model prediction in which the model mistakenly predicts the positive class"},{"term":"FPR","definition":"The ratio between the number of negative events wrongly categorised as positive (false positives) and the total number of actual negative events"},{"term":"inter-cluster metric","definition":"A metric that measures the similarity of data points in different clusters"},{"term":"intra-cluster metric","definition":"A metric that measures the similarity of data points within a cluster"},{"term":"ML benchmark suite","definition":"A dataset used to compare ML models and ML algorithms over a range of evaluation metrics"},{"term":"ML functional performance metrics","definition":"A set of measures that relate to the functional correctness of an ML system"},{"term":"MSE","definition":"The statistical measure of the average squared difference between the estimated values and the actual value"},{"term":"precision","definition":"An ML functional performance metric used to evaluate a classifier, which measures the proportion of predicted positives that were correct (After ISO/IEC TR 29119-11)"},{"term":"R-squared","definition":"A statistical measure of how close the data points are to the fitted regression line"},{"term":"recall","definition":"An ML functional performance metric used to evaluate a classifier, which measures the proportion of actual positives that were predicted correctly (After ISO/IEC TR 29119-11)"},{"term":"regression model","definition":"An ML model whose expected output for a given numeric input is a continuous variable (After ISO/IEC DIS 23053)"},{"term":"ROC","definition":"Receiver operating characteristic"},{"term":"ROC curve","definition":"A graphical plot that illustrates the ability of a binary classifier as its discrimination threshold is varied"},{"term":"silhouette coefficient","definition":"A clustering measure between -1 and +1 based on the average inter-cluster and intra-cluster differences"},{"term":"TN","definition":"A prediction in which the model correctly predicts the negative class"},{"term":"TP","definition":"A prediction in which the model correctly predicts the positive class"},{"term":"TPR","definition":"An ML functional performance metric used to evaluate a classifier, which measures the proportion of actual positives that were predicted correctly (After ISO/IEC TR 29119-11)"},{"term":"activation value","definition":"The output of an activation function of a neuron in a neural network"},{"term":"learning rate","definition":"A small positive constant that determines the step size of the updates for the weights and bias in the perceptron model"},{"term":"linear classifier","definition":"A type of classifier that makes its predictions based on a linear combination of input features"},{"term":"MC/DC","definition":"The coverage of all outcomes of the atomic conditions that independently affect the overall decision outcome (ISTQB Glossary)"},{"term":"neural network","definition":"A network of primitive processing elements connected by weighted links with adjustable weights, in which each element produces a value\nby applying a nonlinear function to its input values, and transmits it to other elements or presents it as an output value (ISO/IEC 2382)"},{"term":"neuron","definition":"A node in a neural network, usually receiving multiple input values and generating an activation value"},{"term":"neuron coverage","definition":"The coverage of activated neurons in the neural network for a set of tests"},{"term":"perceptron","definition":"A neural network with just one layer and one neuron"},{"term":"sign-change coverage","definition":"The coverage of neurons activated with both positive and negative activation values in a neural network for a set of tests"},{"term":"sign-sign coverage","definition":"The coverage achieved if by changing the sign of each neuron it can be shown to individually cause one neuron in the next layer to change sign while all other neurons in the next layer do not change sign for a set of tests"},{"term":"synapse","definition":"A connection between two neurons"},{"term":"threshold coverage","definition":"The coverage of neurons exceeding a threshold activation value in a neural network for a set of tests"},{"term":"training data (for neural network)","definition":"A data used to train the neural network"},{"term":"value-change coverage","definition":"The coverage of neurons activated where their activation values differ by more than a change amount in the neural network for a set of tests"},{"term":"weight","definition":"An internal variable of a connection between neurons in a neural network that affects how it computes its outputs and that changes as the neural network is trained"},{"term":"automation bias","definition":"A type of bias caused by a person favoring the recommendations of an automated decision-making system over other sources"},{"term":"big data","definition":"Extensive datasets whose characteristics in terms of volume, variety, velocity and/or variability require specialised technologies and techniques to process"},{"term":"concept drift","definition":"A change in the perceived accuracy of an ML model predictions over time caused by changes in user expectations, behaviour and the operational environment"},{"term":"input data testing","definition":"A test level that focuses on the quality of the data used for training and prediction by ML models"},{"term":"ML functional performance","definition":"The degree to which an ML model meets ML functional performance criteria"},{"term":"test oracle problem","definition":"The challenge of determining whether a test has passed or failed for a given set of test inputs and state"},{"term":"training data","definition":"Data used to train an ML model"},{"term":"autonomous system","definition":"A system capable of working without human intervention for sustained periods"},{"term":"double-blind testing","definition":"A testing when neither the experts nor the evaluators of the outputs should know which ratings were automated"},{"term":"expert system","definition":"An AI-based system for solving problems in a particular domain or application area by drawing inferences from a knowledge base developed from human expertise"},{"term":"ground truth","definition":"The information provided by direct observation and measurement that is known to be real or true"},{"term":"LIME method","definition":"The Local Interpretable Model-Agnostic Explanations program for explaining the predictions from an ML model"},{"term":"model-agnostic method","definition":"A method that may be used for any ML model"},{"term":"ML training data","definition":"A data used to train an ML model"},{"term":"non-deterministic system","definition":"A system which will not always produce the same set of outputs and final state given a particular set of inputs and starting state"},{"term":"probabilistic system","definition":"A system whose behaviour is described in terms of probabilities; hence its outputs cannot be perfectly predicted"},{"term":"SUT","definition":"A type of test object that is a system"},{"term":"test oracle","definition":"A source to determine an expected result to compare with the actual result of the system under test"},{"term":"test suite","definition":"A set of test scripts or test procedures to be executed in a specific test run"},{"term":"A/B testing","definition":"A statistical testing approach to determine which of two components or systems performs better"},{"term":"adversarial attack","definition":"The deliberate use of adversarial examples to cause an ML model to fail"},{"term":"adversarial examples","definition":"Perturbed valid inputs that are passed to the trained model to cause it to provide incorrect predictions during the adversarial attack"},{"term":"adversarial testing","definition":"The test activity when adversarial examples are identified and added to the training data, making sure that the model learns to correctly recognise them"},{"term":"attacker","definition":"A person seeking to exploit potential vulnerabilities of a system"},{"term":"back-to-back testing","definition":"Testing to compare two or more variants of a test item or a simulation model of the same test item by executing the same test cases on all variants and comparing the results"},{"term":"data poisoning","definition":"The deliberate and malicious manipulation of training or input data to an ML model"},{"term":"error guessing","definition":"A test design technique where the experience of the tester is used to anticipate what defects might be present in the component or system under test as a result of errors made, and to design tests specifically to expose them"},{"term":"exhaustive testing","definition":"A test approach in which the test suite comprises all combinations of input values and preconditions"},{"term":"experience-based testing","definition":"Testing based on the tester's experience, knowledge and intuition"},{"term":"exploratory testing","definition":"An informal test design technique where the tester actively controls the design of the tests as those tests are performed and uses information gained while testing to design new and better tests"},{"term":"follow-up test case","definition":"A test case generated by applying a metamorphic relation to a source test case during metamorphic testing"},{"term":"MR","definition":"A description of how a change in the test inputs from the source test case to the follow-up test case affects a change in the expected outputs from the source test case to the follow-up test case"},{"term":"MT","definition":"A test technique in which the inputs and expected results are extrapolated from a passing test case using a metamorphic relation"},{"term":"neural network trojan","definition":"A vulnerability injected into a neural network using a data poisoning attack with the intent of exploiting it later"},{"term":"outlier","definition":"An observation that lies outside the overall pattern of the data distribution"},{"term":"pairwise testing","definition":"A test technique in which test cases are designed to execute all possible discrete combinations of each pair of input parameters"},{"term":"partial oracle","definition":"A test oracle that verifies only some aspects of the test output"},{"term":"pattern","definition":"A regularity in data or a systematic relationship between data points"},{"term":"pseudo-oracle","definition":"An independently derived variant of the test item used to generate results which are compared with the results of the original test item based on the same test"},{"term":"source test case","definition":"A test case that passed and is used as the basis of follow-up test cases in metamorphic testing"},{"term":"tour","definition":"A set of exploratory tests organised around a special focus"},{"term":"trend","definition":"An upwards or downwards shift in a data set over time"},{"term":"virtual test environment","definition":"A test environment in which one or more parts are digitally simulated"},{"term":"multi-agent system","definition":"A system that comprises multiple intelligent agents"},{"term":"test environment","definition":"A space where AI models and systems are evaluated, validated, and tested to ensure they meet the set standards before deployment"},{"term":"digital twin","definition":"An advanced continuous simulation of a physical entity"},{"term":"Bayesian techniques","definition":"A technique that considers before and after probability distributions as parameters of a statistical model"},{"term":"defect prediction","definition":"A technique to predict the areas within the test object in which defects will occur or the quantity of defects that are present"},{"term":"GUI","definition":"A type of interface that allows users to interact with a component or system through graphical icons and visual indicators"},{"term":"visual testing","definition":"Testing that uses image recognition to interact with GUI objects through the same interface as an actual user, and does not require access to the underlying code and interface definitions"},{"term":"weak AI","definition":"AI focused on a single well-defined task to address a specific problem (ISO/IEC TR 29119-11)"}];
const GENAI_GLOSSARY=[{"term":"A/B testing","definition":"A statistical testing approach to determine which of two components or systems performs better"},{"term":"acceptance criteria","definition":"The criteria that a work product must satisfy to be accepted by the stakeholders"},{"term":"AI chatbot","definition":"A conversational agent that uses LLMs to process queries and generate human-like text responses, enabling interactive communication with users"},{"term":"back-to-back testing","definition":"Testing to compare two or more variants of a test item or a simulation model of the same test item by executing the same test cases on all variants and comparing the results"},{"term":"context window","definition":"The span of text, measured in tokens, that a language model considers when generating responses, influencing the relevance and coherence of its outputs"},{"term":"deep learning","definition":"ML using neural networks with multiple layers"},{"term":"embedding","definition":"A technique used to represent tokens as dense vectors in a continuous space, learned during training to capture semantic, syntactic, and contextual relationships"},{"term":"feature","definition":"An individual measurable attribute of the input data used for training by an ML algorithm and for prediction by an ML model"},{"term":"foundation LLM","definition":"General-purpose models pre-trained on a wide range of text data, capable of predicting the next word based on learned linguistic patterns"},{"term":"GenAI","definition":"A type of artificial intelligence system that uses machine learning models to generate (new) intellectual content that resembles human-created content"},{"term":"GPT","definition":"A type of transformer-based deep learning model pre-trained on vast amounts of text data to understand and generate human-like text"},{"term":"ground truth","definition":"The information provided by direct observation and measurement that is known to be real or true"},{"term":"instruction-tuned LLM","definition":"A foundation LLM trained to follow instructions, often reinforced by feedback to encourage correct answers"},{"term":"LLM","definition":"A computer program that uses very large collections of language data in order to understand and produce text in a way that is similar to the way humans do"},{"term":"ML","definition":"The process using computational techniques to enable systems to learn from data or experience (ISO/IEC TR 29119-11)"},{"term":"metamorphic testing","definition":"A test technique in which the inputs and expected results are extrapolated from a passing test case using a metamorphic relation"},{"term":"multimodal model","definition":"GenAI model that is capable of processing and generating content across multiple data types, such as text, images, and audio"},{"term":"prompt","definition":"A natural language input provided to elicit specific response in Generative AI and large language models"},{"term":"reasoning LLM","definition":"An LLM building upon instruction-tuned models by refining their ability to emulate human-like reasoning processes"},{"term":"SLM","definition":"Language model that is intentionally designed and trained to be small, offering a balance between efficiency and task-specific language understanding"},{"term":"symbolic AI","definition":"An AI approach that uses symbols, rules, and structured knowledge to model reasoning"},{"term":"test data","definition":"Data needed for test execution"},{"term":"test case","definition":"A set of preconditions, inputs, actions (where applicable), expected results and postconditions, developed based on test conditions"},{"term":"test oracle","definition":"A source to determine an expected result to compare with the actual result of the system under test"},{"term":"test oracle problem","definition":"The challenge of determining whether a test has passed or failed for a given set of test inputs and state"},{"term":"test script","definition":"A sequence of instructions for the execution of a test"},{"term":"tokenization","definition":"The process of breaking down text into smaller units (tokens) for processing by language models"},{"term":"transformer","definition":"A deep learning model architecture that utilises self-attention mechanisms to capture long-range dependencies in input sequences"},{"term":"vision-language model","definition":"A GenAI system that jointly processes visual and textual data to perform tasks by linking and generating content across both modalities"},{"term":"few-shot prompting","definition":"A technique where a model is given a few examples within the prompt to guide it in generating appropriate responses"},{"term":"meta prompting","definition":"The crafting of higher-level instructions that generate specific prompts for exploring or automating capabilities"},{"term":"NLP","definition":"The processing of data encoded in natural language by computers to retrieve information and for knowledge representation"},{"term":"one-shot prompting","definition":"A prompt writing technique where the prompt contains one example to guide the LLM's response"},{"term":"prompt chaining","definition":"A prompting technique that involves using the output of one prompt as the input for another, creating a sequence of prompts"},{"term":"prompt engineering","definition":"The process of designing and refining input prompts to guide LLMs toward producing desired outputs"},{"term":"system prompt","definition":"A predefined instruction set, typically hidden from the chatbot’s users, that consistently establishes the context, tone, and boundaries for an LLM's responses and guides its behaviour throughout interactions"},{"term":"test condition","definition":"A testable aspect of a component or system identified as a basis for testing"},{"term":"test design","definition":"The activity that derives and specifies test cases from test conditions"},{"term":"test implementation","definition":"The activity that prepares the testware needed for test execution"},{"term":"test report","definition":"Documentation summarising testing and results"},{"term":"user prompt","definition":"An instruction or query entered by a user into a Large Language Model (LLM) that directs the model's response to fulfil specific tasks or provide desired information"},{"term":"zero-shot prompting","definition":"A prompt writing technique where the prompt contains no examples, relying on the model's pre-existing knowledge to generate a response"},{"term":"bias","definition":"The systematic difference in treatment of certain objects, people or groups in comparison to others (ISO/IEC DIS 22989)"},{"term":"data privacy","definition":"The protection of personally identifiable information or otherwise sensitive information from undesired disclosure"},{"term":"hallucination","definition":"Wrong information created by an LLM"},{"term":"reasoning error","definition":"Errors that occur when LLMs misinterpret logical structures, leading to incorrect conclusions"},{"term":"security","definition":"The degree to which a component or system protects its data and resources against unauthorised access or use and secures unobstructed access and use for its legitimate users"},{"term":"temperature","definition":"A parameter that controls the randomness or creativity of LLM's outputs"},{"term":"vulnerability","definition":"A weakness in a component, system, procedures, or controls that could allow for a successful security attack"},{"term":"fine-tuning","definition":"A supervised learning process using a dataset of labelled examples to update LLM weights and adapt them for specific tasks or domains"},{"term":"LLM-powered agent","definition":"An application that integrates LLM reasoning, decision-making, and memory, using tools to perform tasks"},{"term":"LLMOps","definition":"Practices and tools focused on deploying, monitoring, and maintaining LLMs in production environments"},{"term":"overfitting","definition":"The generation of an ML model that corresponds too closely to the training dataset, resulting in a model that finds it difficult to generalise to new data"},{"term":"RAG","definition":"A technique combining LLM capabilities with a retriever to fetch relevant data for generating accurate, contextually relevant responses"},{"term":"test infrastructure","definition":"The test environments, test tools, office environment and procedures needed to perform testing"},{"term":"vector database","definition":"A database optimised for storing and querying high-dimensional vector representations of data"},{"term":"shadow AI","definition":"The use of GenAI tools or systems within an organisation without formal approval or oversight"}];