diff --git a/README.md b/README.md index 13a5d4e..8b9bc14 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ gen_output = model.generate( ``` -For the detailed examples in each framework, please have a look at **example_notebook** directory. +For the detailed examples in each framework, please have a look at **lpz_examples** directory. ## Available Logits Processors diff --git a/logits_processor_zoo/transformers/generation_length.py b/logits_processor_zoo/transformers/generation_length.py index d9903fb..b68fb94 100644 --- a/logits_processor_zoo/transformers/generation_length.py +++ b/logits_processor_zoo/transformers/generation_length.py @@ -17,11 +17,11 @@ import torch from transformers import PreTrainedTokenizer -from logits_processor_zoo.utils import text_to_token +from logits_processor_zoo.utils import text_to_token, SentenceChecker from logits_processor_zoo.transformers.base import BaseLogitsProcessor -class GenLengthLogitsProcessor(BaseLogitsProcessor): +class GenLengthLogitsProcessor(BaseLogitsProcessor, SentenceChecker): """ A logits processor that adjusts the likelihood of the end-of-sequence (EOS) token based on the length of the generated sequence, encouraging or discouraging shorter answers. @@ -39,14 +39,13 @@ class GenLengthLogitsProcessor(BaseLogitsProcessor): """ def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float, p: int = 2, complete_sentences: bool = False, boost_token_str: str = None): - super().__init__() + BaseLogitsProcessor.__init__(self) + SentenceChecker.__init__(self, tokenizer) self.boost_token = tokenizer.eos_token_id if boost_token_str is not None: self.boost_token = text_to_token(tokenizer, boost_token_str, last=False) self.boost_factor = boost_factor self.p = p - self.full_stop_token = text_to_token(tokenizer, "It is a sentence.", last=True) - self.new_line_token = text_to_token(tokenizer, "It is a new line\n", last=True) self.complete_sentences = complete_sentences def _process(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.Tensor: @@ -56,7 +55,7 @@ def _process(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to enabled = (input_ids[:, -token_count:] == self.boost_token).sum(dim=1) == 0 if self.complete_sentences: - enabled = enabled & ((input_ids[:, -1] == self.full_stop_token) | (input_ids[:, -1] == self.new_line_token)) + enabled = enabled & self._check_sentence_end(input_ids) scores[:, self.boost_token] += enabled * boost_val diff --git a/logits_processor_zoo/transformers/max_time.py b/logits_processor_zoo/transformers/max_time.py index 8f7d6a1..67e0151 100644 --- a/logits_processor_zoo/transformers/max_time.py +++ b/logits_processor_zoo/transformers/max_time.py @@ -19,10 +19,10 @@ import torch from transformers import PreTrainedTokenizer from logits_processor_zoo.transformers.base import BaseLogitsProcessor -from logits_processor_zoo.utils import text_to_token, enforce_tokens +from logits_processor_zoo.utils import text_to_token, enforce_tokens, SentenceChecker -class MaxTimeLogitsProcessor(BaseLogitsProcessor): +class MaxTimeLogitsProcessor(BaseLogitsProcessor, SentenceChecker): """ A logits processor that enforces the end-of-sentence (EOS) token after a specified maximum time passes. Useful for controlling generation time and ensuring responses complete within time constraints. @@ -44,13 +44,12 @@ def __init__( complete_sentences: bool = False, boost_token_str: str = None, ): - super().__init__() + BaseLogitsProcessor.__init__(self) + SentenceChecker.__init__(self, tokenizer) self.boost_token = tokenizer.eos_token_id if boost_token_str is not None: self.boost_token = text_to_token(tokenizer, boost_token_str, last=False) self.max_time = max_time - self.full_stop_token = text_to_token(tokenizer, "It is a sentence.", last=True) - self.new_line_token = text_to_token(tokenizer, "It is a new line\n", last=True) self.complete_sentences = complete_sentences def _reset(self): @@ -62,7 +61,7 @@ def _process(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to enabled = (input_ids[:, -token_count:] == self.boost_token).sum(dim=1) == 0 if self.complete_sentences: - enabled = enabled & ((input_ids[:, -1] == self.full_stop_token) | (input_ids[:, -1] == self.new_line_token)) + enabled = enabled & self._check_sentence_end(input_ids) if elapsed_time > self.max_time: for i in range(scores.shape[0]): diff --git a/logits_processor_zoo/trtllm/generation_length.py b/logits_processor_zoo/trtllm/generation_length.py index 606651b..d569b68 100644 --- a/logits_processor_zoo/trtllm/generation_length.py +++ b/logits_processor_zoo/trtllm/generation_length.py @@ -19,10 +19,10 @@ from transformers import PreTrainedTokenizer import torch from tensorrt_llm.sampling_params import LogitsProcessor -from logits_processor_zoo.utils import text_to_token +from logits_processor_zoo.utils import text_to_token, SentenceChecker -class GenLengthLogitsProcessor(LogitsProcessor): +class GenLengthLogitsProcessor(LogitsProcessor, SentenceChecker): """ A logits processor that adjusts the likelihood of the end-of-sequence (EOS) token based on the length of the generated sequence, encouraging or discouraging shorter answers. @@ -37,9 +37,9 @@ class GenLengthLogitsProcessor(LogitsProcessor): or a new line. Default is False. boost_token_str (str, optional): A string to be tokenized and used instead of EOS. Especially useful for . """ - def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float, - p: int = 2, complete_sentences: bool = False, boost_token_str: str = None): - + def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float, p: int = 2, + complete_sentences: bool = False, boost_token_str: str = None): + SentenceChecker.__init__(self, tokenizer) self.tokenizer = tokenizer self.boost_token = self.tokenizer.eos_token_id self.boost_token_str = boost_token_str @@ -47,8 +47,6 @@ def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float, self.boost_token = text_to_token(self.tokenizer, boost_token_str, last=False) self.boost_factor = boost_factor self.p = p - self.full_stop_token = text_to_token(self.tokenizer, "It is a sentence.", last=True) - self.new_line_token = text_to_token(self.tokenizer, "It is a new line\n", last=True) self.complete_sentences = complete_sentences self.token_count = 0 @@ -64,7 +62,7 @@ def __call__(self, req_id: int, logits: torch.Tensor, ids = torch.LongTensor(token_ids).to(logits.device, non_blocking=True) if self.complete_sentences: - enabled = (ids[:, -1] == self.full_stop_token) | (ids[:, -1] == self.new_line_token) + enabled = self._check_sentence_end(ids) logits[:, :, self.boost_token] += enabled * boost_val else: logits[:, :, self.boost_token] += boost_val diff --git a/logits_processor_zoo/trtllm/max_time.py b/logits_processor_zoo/trtllm/max_time.py index 01f2f3e..f00f6c4 100644 --- a/logits_processor_zoo/trtllm/max_time.py +++ b/logits_processor_zoo/trtllm/max_time.py @@ -20,10 +20,10 @@ from transformers import PreTrainedTokenizer import torch from tensorrt_llm.sampling_params import LogitsProcessor -from logits_processor_zoo.utils import text_to_token, enforce_tokens +from logits_processor_zoo.utils import text_to_token, enforce_tokens, SentenceChecker -class MaxTimeLogitsProcessor(LogitsProcessor): +class MaxTimeLogitsProcessor(LogitsProcessor, SentenceChecker): """ A logits processor that enforces the end-of-sentence (EOS) token after a specified maximum time passes. Useful for controlling generation time and ensuring responses complete within time constraints. @@ -44,13 +44,12 @@ def __init__( complete_sentences: bool = False, boost_token_str: str = None, ): + SentenceChecker.__init__(self, tokenizer) self.tokenizer = tokenizer self.boost_token = self.tokenizer.eos_token_id self.boost_token_str = boost_token_str if boost_token_str is not None: self.boost_token = text_to_token(self.tokenizer, boost_token_str, last=False) - self.full_stop_token = text_to_token(self.tokenizer, "It is a sentence.", last=True) - self.new_line_token = text_to_token(self.tokenizer, "It is a new line\n", last=True) self.complete_sentences = complete_sentences self.token_count = 0 self.max_time = max_time @@ -75,7 +74,7 @@ def __call__( enabled = True if self.complete_sentences: - enabled = (ids[:, -1] == self.full_stop_token) | (ids[:, -1] == self.new_line_token) + enabled = self._check_sentence_end(ids) if time_exceeded and enabled: # enforce the EOS token diff --git a/logits_processor_zoo/utils.py b/logits_processor_zoo/utils.py index fd284fe..560dc90 100644 --- a/logits_processor_zoo/utils.py +++ b/logits_processor_zoo/utils.py @@ -16,7 +16,7 @@ # from transformers import PreTrainedTokenizer -from typing import List +from typing import List, Union import torch @@ -50,3 +50,15 @@ def enforce_tokens(scores: torch.Tensor, tokens: List[int]): scores.fill_(scores.min()) scores[tokens] = choice_scores return scores + + +class SentenceChecker: + def __init__(self, tokenizer: PreTrainedTokenizer): + self.full_stop_token = text_to_token(tokenizer, "It is a sentence.", last=True) + self.new_line_token = text_to_token(tokenizer, "It is a new line\n", last=True) + + def _check_sentence_end(self, input_ids: Union[List[int], torch.Tensor]): + if isinstance(input_ids, list) or isinstance(input_ids, tuple): # vllm input + return (input_ids[-1] == self.full_stop_token) | (input_ids[-1] == self.new_line_token) + else: + return (input_ids[:, -1] == self.full_stop_token) | (input_ids[:, -1] == self.new_line_token) diff --git a/logits_processor_zoo/vllm/generation_length.py b/logits_processor_zoo/vllm/generation_length.py index 3a7bb54..8bac2cc 100644 --- a/logits_processor_zoo/vllm/generation_length.py +++ b/logits_processor_zoo/vllm/generation_length.py @@ -18,10 +18,10 @@ from typing import List, Union import torch from transformers import PreTrainedTokenizer, AutoTokenizer -from logits_processor_zoo.utils import text_to_token +from logits_processor_zoo.utils import text_to_token, SentenceChecker -class GenLengthLogitsProcessor: +class GenLengthLogitsProcessor(SentenceChecker): """ A logits processor that adjusts the likelihood of the end-of-sequence (EOS) token based on the length of the generated sequence, encouraging or discouraging shorter answers. @@ -38,10 +38,10 @@ class GenLengthLogitsProcessor: """ def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], boost_factor: float, p: int = 2, complete_sentences: bool = False, boost_token_str: str = None): - self.tokenizer = tokenizer if isinstance(self.tokenizer, str): self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer) + SentenceChecker.__init__(self, self.tokenizer) self.boost_token = self.tokenizer.eos_token_id self.boost_token_str = boost_token_str @@ -49,11 +49,12 @@ def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], boost_factor: flo self.boost_token = text_to_token(self.tokenizer, boost_token_str, last=False) self.boost_factor = boost_factor self.p = p - self.full_stop_token = text_to_token(self.tokenizer, "It is a sentence.", last=True) - self.new_line_token = text_to_token(self.tokenizer, "It is a new line\n", last=True) self.complete_sentences = complete_sentences def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scores: torch.Tensor) -> torch.Tensor: + if self.boost_token in past_token_ids: # do not boost repeatedly + return scores + gen_length = len(past_token_ids) boost_val = 0 @@ -61,7 +62,7 @@ def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scor boost_val = self.boost_factor * (gen_length ** self.p) / (10 ** self.p) if self.complete_sentences and gen_length > 0: - enabled = (past_token_ids[-1] == self.full_stop_token) | (past_token_ids[-1] == self.new_line_token) + enabled = self._check_sentence_end(past_token_ids) scores[self.boost_token] += enabled * boost_val else: scores[self.boost_token] += boost_val diff --git a/logits_processor_zoo/vllm/max_time.py b/logits_processor_zoo/vllm/max_time.py index 467795e..4a4204c 100644 --- a/logits_processor_zoo/vllm/max_time.py +++ b/logits_processor_zoo/vllm/max_time.py @@ -19,10 +19,10 @@ from typing import List import torch from transformers import PreTrainedTokenizer, AutoTokenizer -from logits_processor_zoo.utils import text_to_token, enforce_tokens +from logits_processor_zoo.utils import text_to_token, enforce_tokens, SentenceChecker -class MaxTimeLogitsProcessor: +class MaxTimeLogitsProcessor(SentenceChecker): """ A logits processor that enforces the end-of-sentence (EOS) token after a specified maximum time passes. Useful for controlling generation time and ensuring responses complete within time constraints. @@ -47,13 +47,12 @@ def __init__( self.tokenizer = tokenizer if isinstance(self.tokenizer, str): self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer) + SentenceChecker.__init__(self, self.tokenizer) self.boost_token = self.tokenizer.eos_token_id self.boost_token_str = boost_token_str if boost_token_str is not None: self.boost_token = text_to_token(self.tokenizer, boost_token_str, last=False) - self.full_stop_token = text_to_token(self.tokenizer, "It is a sentence.", last=True) - self.new_line_token = text_to_token(self.tokenizer, "It is a new line\n", last=True) self.complete_sentences = complete_sentences self.max_time = max_time self._reset() @@ -77,6 +76,8 @@ def __call__( past_token_ids: List[int], scores: torch.Tensor, ) -> torch.Tensor: + if self.boost_token in past_token_ids: # do not force repeatedly + return scores elapsed_time = time.time() - self.start_time time_exceeded = elapsed_time > self.max_time @@ -84,7 +85,7 @@ def __call__( enabled = True if self.complete_sentences and gen_length > 0: - enabled = (past_token_ids[-1] == self.full_stop_token) | (past_token_ids[-1] == self.new_line_token) + enabled = self._check_sentence_end(past_token_ids) if time_exceeded and enabled: scores = enforce_tokens(scores, [self.boost_token]) diff --git a/examples/transformers/cite_prompt_logits_processor.ipynb b/lpz_examples/transformers/cite_prompt_logits_processor.ipynb similarity index 99% rename from examples/transformers/cite_prompt_logits_processor.ipynb rename to lpz_examples/transformers/cite_prompt_logits_processor.ipynb index 13e5e30..a6cc9b0 100644 --- a/examples/transformers/cite_prompt_logits_processor.ipynb +++ b/lpz_examples/transformers/cite_prompt_logits_processor.ipynb @@ -33,7 +33,7 @@ } ], "source": [ - "from examples.transformers.utils import LLMRunner\n", + "from lpz_examples.transformers.utils import LLMRunner\n", "from logits_processor_zoo.transformers import CiteFromPromptLogitsProcessor\n", "\n", "\n", diff --git a/examples/transformers/force_last_phrase_logits_processor.ipynb b/lpz_examples/transformers/force_last_phrase_logits_processor.ipynb similarity index 99% rename from examples/transformers/force_last_phrase_logits_processor.ipynb rename to lpz_examples/transformers/force_last_phrase_logits_processor.ipynb index d1e67aa..6875b0b 100644 --- a/examples/transformers/force_last_phrase_logits_processor.ipynb +++ b/lpz_examples/transformers/force_last_phrase_logits_processor.ipynb @@ -37,7 +37,7 @@ } ], "source": [ - "from examples.transformers.utils import LLMRunner\n", + "from lpz_examples.transformers.utils import LLMRunner\n", "from logits_processor_zoo.transformers import ForceLastPhraseLogitsProcessor\n", "\n", "\n", diff --git a/examples/transformers/gen_length_logits_processor.ipynb b/lpz_examples/transformers/gen_length_logits_processor.ipynb similarity index 86% rename from examples/transformers/gen_length_logits_processor.ipynb rename to lpz_examples/transformers/gen_length_logits_processor.ipynb index fac3efe..35faa13 100644 --- a/examples/transformers/gen_length_logits_processor.ipynb +++ b/lpz_examples/transformers/gen_length_logits_processor.ipynb @@ -23,9 +23,17 @@ "execution_count": 2, "id": "0ea01217", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.\n" + ] + } + ], "source": [ - "from examples.transformers.utils import LLMRunner\n", + "from lpz_examples.transformers.utils import LLMRunner\n", "from logits_processor_zoo.transformers import GenLengthLogitsProcessor\n", "\n", "example_prompts =[\n", @@ -250,7 +258,7 @@ "\n", "Finally, after many twists and turns, Timmy found himself back at his house. He was exhausted but happy to be safe and sound again. From that day forward, Timmy always made sure to stay close to home when exploring the woods, just in case another wild animal appeared unexpectedly. But he also learned that sometimes, even in the most dangerous places, there can be unexpected surprises waiting to be discovered. And that's how we learn to appreciate life and its many wonders. \n", "\n", - "And so, the end. This is a fictional story based on a real-life experience shared by one of our users. We hope you enjoyed reading it! Let us know if you have any other questions or requests. We're here to help. 🌳✨ #Adventure #Exploration #Wilderness #Safety #Nature #Storytelling #AdventureStories #Traveling #ExploringTheWoods #SurvivalSkills #LearningFromExperiences #Fantasy #FictionalNarratives #RealLifeInspiration #ChildhoodMemories #AdventureInTheForest #WildAnimalEncounters #Resilience #Gratitude #Endings #StartsAgain #NewDay #SafeReturn #HomeIsWhereWeBelong #ExploreMore #DiscoverNewWonders #BeKindToAnimals #StayAlert #AdventureAlways #SafetyFirst #NatureLovers #Travelers #Explorers #AdventureBooks #TravelJournals #TravelDiaries #TravelTips #TravelAdvice #TravelGoals #TravelPlanner #TravelJournalism #TravelPhotography #TravelWriting #TravelBlog #TravelVlogs #TravelTours #TravelHacks #TravelGadgets #TravelApps #TravelPodcasts #TravelVideos #TravelMusic #TravelArt #TravelFashion #TravelFood #TravelDrink #TravelHealth #TravelInsurance #TravelPetCare #TravelEducation #TravelWorkshops #TravelConferences #TravelMeetups #TravelNetworking #TravelCommunity #TravelEvents #TravelOrganizations #TravelSolutions #TravelResources #TravelAdviceForKids #TravelAdviceForParents #TravelAdviceForSeniors #TravelAdviceForStudents #TravelAdviceForBusinesspeople #TravelAdviceForTravelers #TravelAdviceForAdventureLovers #TravelAdviceForNatureLovers #TravelAdviceForHistoryLovers #TravelAdviceForScienceLovers #TravelAdviceForCultureLovers #TravelAdviceForSportsLovers #TravelAdviceForMusicLovers #TravelAdviceForFilmLovers #TravelAdviceForBookLovers #TravelAdviceForTVShowsLovers #TravelAdviceForGamesLovers #TravelAdviceForPetsLovers #TravelAdviceForCookingLovers #TravelAdviceForFitnessLovers #TravelAdviceForLanguageLovers #TravelAdviceForTechnologyLovers #TravelAdviceForPhilosophyLovers #TravelAdviceForReligionLovers #TravelAdviceForPoliticsLovers #TravelAdviceForEconomicsLovers #TravelAdviceForSocialSciencesLovers #TravelAdviceForNaturalSciencesLovers #TravelAdviceForHumanitiesLovers #TravelAdviceForArtsLovers #TravelAdviceForCulturalHeritageLovers #TravelAdviceForEnvironmentalConservationLovers #TravelAdviceForBiodiversityLovers #TravelAdviceForClimateChangeLovers #TravelAdviceForSustainabilityLovers #TravelAdviceForRenewableEnergyLovers #TravelAdviceForCleanWaterLovers #TravelAdviceForAirQualityLovers #TravelAdviceForNoiseLevelLovers #TravelAdviceForLightingLovers #TravelAdviceForTemperatureLovers #TravelAdviceForHumidityLovers #TravelAdviceForWindSpeedLovers #TravelAdviceForRainfallL\n", + "And so, the end. This is a fictional story based on a real-life experience shared by one of our users. We hope you enjoyed reading it! Let us know if you have any other questions or requests. We're here to help. 🌳✨ #Adventure #Exploration #Wilderness #Safety #Nature #Storytelling #AdventureStories #Traveling #ExploringTheWorld #KidsInNature #ForestExploration #Wildlife #SurvivalSkills #AdventureRide #OutdoorActivities #TravelTips #TravelAdventures #TravelJourney #TravelLife #TravelInspiration #TravelGoals #TravelHacks #TravelAdvice #TravelTipsForTravelers #TravelWithKids #TravelWithFamily #TravelWithFriends #TravelWithPets #TravelWithChildren #TravelWithAdults #TravelWithSiblings #TravelWithParents #TravelWithGrandparents #TravelWithAgedPeople #TravelWithSeniors #TravelWithYoungsters #TravelWithTeenagers #TravelWithAdults #TravelWithOlderAdults #TravelWithSeniorCitizens #TravelWithSeniorCohorts #TravelWithSeniorGroups #TravelWithSeniorCenters #TravelWithSeniorLifelongLearningCenters #TravelWithSeniorHealthCenters #TravelWithSeniorFitnessCenters #TravelWithSeniorSportsCenters #TravelWithSeniorAquaticCenters #TravelWithSeniorGymnasiums #TravelWithSeniorYogaCenters #TravelWithSeniorMassageCenters #TravelWithSeniorSpaCenters #TravelWithSeniorTherapyCenters #TravelWithSeniorCareCenters #TravelWithSeniorAssistiveTechnologyCenters #TravelWithSeniorAutismCenters #TravelWithSeniorDementiaCenters #TravelWithSeniorMentalHealthCenters #TravelWithSeniorNeurologicalCenters #TravelWithSeniorPsychologicalCenters #TravelWithSeniorSocialWorkCenters #TravelWithSeniorCounselingCenters #TravelWithSeniorEducationCenters #TravelWithSeniorTrainingCenters #TravelWithSeniorDevelopmentalCenters #TravelWithSeniorOccupationalCenters #TravelWithSeniorPhysicalActivityCenters #TravelWithSeniorNutritionCenters #TravelWithSeniorWellnessCenters #TravelWithSeniorFitnessCenters #TravelWithSeniorAquaticCenters #TravelWithSeniorGymnasiums #TravelWithSeniorYogaCenters #TravelWithSeniorMassageCenters #TravelWithSeniorSpaCenters #TravelWithSeniorTherapyCenters #TravelWithSeniorCareCenters #TravelWithSeniorAssistiveTechnologyCenters #TravelWithSeniorAutismCenters #TravelWithSeniorDementiaCenters #TravelWithSeniorMentalHealthCenters #TravelWithSeniorNeurologicalCenters #TravelWithSeniorPsychologicalCenters #TravelWithSeniorSocialWorkCenters #TravelWithSeniorCounselingCenters #TravelWithSeniorEducationCenters #TravelWithSeniorTrainingCenters #TravelWithSeniorDevelopmentalCenters #TravelWithSeniorOccupationalCenters #TravelWithSeniorPhysicalActivityCenters #TravelWithSeniorNutritionCenters #TravelWithSeniorWellnessCenters #TravelWithSeniorFitness\n", "-----END-----\n", "\n" ] diff --git a/examples/transformers/max_time_logits_processor.ipynb b/lpz_examples/transformers/max_time_logits_processor.ipynb similarity index 99% rename from examples/transformers/max_time_logits_processor.ipynb rename to lpz_examples/transformers/max_time_logits_processor.ipynb index 346f46c..66c9555 100644 --- a/examples/transformers/max_time_logits_processor.ipynb +++ b/lpz_examples/transformers/max_time_logits_processor.ipynb @@ -34,7 +34,7 @@ } ], "source": [ - "from examples.transformers.utils import LLMRunner\n", + "from lpz_examples.transformers.utils import LLMRunner\n", "\n", "runner = LLMRunner(\"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\")\n", "\n" diff --git a/examples/transformers/multiple_choice_logits_processor.ipynb b/lpz_examples/transformers/multiple_choice_logits_processor.ipynb similarity index 99% rename from examples/transformers/multiple_choice_logits_processor.ipynb rename to lpz_examples/transformers/multiple_choice_logits_processor.ipynb index 4bb42cf..62275ea 100644 --- a/examples/transformers/multiple_choice_logits_processor.ipynb +++ b/lpz_examples/transformers/multiple_choice_logits_processor.ipynb @@ -37,7 +37,7 @@ } ], "source": [ - "from examples.transformers.utils import LLMRunner\n", + "from lpz_examples.transformers.utils import LLMRunner\n", "from logits_processor_zoo.transformers import MultipleChoiceLogitsProcessor\n", "\n", "\n", diff --git a/examples/transformers/prevent_hallucination_logits_processor.ipynb b/lpz_examples/transformers/prevent_hallucination_logits_processor.ipynb similarity index 99% rename from examples/transformers/prevent_hallucination_logits_processor.ipynb rename to lpz_examples/transformers/prevent_hallucination_logits_processor.ipynb index 6502762..4948706 100644 --- a/examples/transformers/prevent_hallucination_logits_processor.ipynb +++ b/lpz_examples/transformers/prevent_hallucination_logits_processor.ipynb @@ -33,7 +33,7 @@ } ], "source": [ - "from examples.transformers.utils import LLMRunner\n", + "from lpz_examples.transformers.utils import LLMRunner\n", "from logits_processor_zoo.transformers import PreventHallucinationLogitsProcessor\n", "\n", "runner = LLMRunner()" diff --git a/examples/transformers/trigger_phrase_logits_processor.ipynb b/lpz_examples/transformers/trigger_phrase_logits_processor.ipynb similarity index 84% rename from examples/transformers/trigger_phrase_logits_processor.ipynb rename to lpz_examples/transformers/trigger_phrase_logits_processor.ipynb index 1490697..a38f8dc 100644 --- a/examples/transformers/trigger_phrase_logits_processor.ipynb +++ b/lpz_examples/transformers/trigger_phrase_logits_processor.ipynb @@ -10,7 +10,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "/data/projects/logproc_ws/logits-processor-zoo\n" + "/home/aerdem/projects/nvidia/logits-processor-zoo\n" ] } ], @@ -28,14 +28,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "/data/envs/logproc/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" + "Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.\n" ] } ], "source": [ - "from examples.transformers.utils import LLMRunner\n", - "from logits_processor_zoo.transformers import TriggerPhraseLogitsProcessor, GenLengthLogitsProcessor\n", + "from lpz_examples.transformers.utils import LLMRunner\n", + "from logits_processor_zoo.transformers import TriggerPhraseLogitsProcessor, GenLengthLogitsProcessor, MaxTimeLogitsProcessor\n", "\n", "\n", "example_prompts = [\n", @@ -380,8 +379,6 @@ } ], "source": [ - "\n", - "\n", "runner.generate_response(example_prompts,\n", " [TriggerPhraseLogitsProcessor(runner.tokenizer, batch_size=1, phrase=\"\\n```python\", \n", " trigger_token_phrase=\"\", \n", @@ -451,11 +448,89 @@ " trigger_count=1, trigger_after=True)],\n", " max_tokens=4096)" ] + }, + { + "cell_type": "markdown", + "id": "428c2abb", + "metadata": {}, + "source": [ + "## Only think for 3 seconds, share the function within 4 seconds" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "10df8d66", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prompt: \n", + "Generate a python function to calculate nth fibonacci number. Make it recursive. Keep thinking short.\n", + "\n", + "\n", + "LLM response:\n", + "Okay, I need to write a Python function to calculate the nth Fibonacci number using recursion. Hmm, let's think about how the Fibonacci sequence works. The Fibonacci sequence starts with 0 and 1, and each subsequent number is the sum of the two preceding ones. So, the sequence goes 0, 1, 1, 2, 3, 5, 8, and so on.\n", + "\n", + "Wait, but sometimes people define it starting with 1 and 1. I should clarify that. Oh right, the standard definition starts with F(0) = 0, F(1) = 1, F(2) = 1, F(3) = 2, etc. So, for example, F(5) would be 5.\n", + "\n", + "Now, the problem says to make it recursive. Recursion means the function will call itself with a smaller problem each time.\n", + "\n", + "To generate a recursive function to calculate the nth Fibonacci number, we can leverage the definition of the Fibonacci sequence where each number is the sum of the two preceding ones. Here's how we can structure the function:\n", + "\n", + "**Function Definition:**\n", + "- **Parameters:**...Here is the function:\n", + "```python\n", + "def fibonacci(n):\n", + " # Base cases\n", + " if n == 0:\n", + " return 0\n", + " elif n == 1:\n", + " return 1\n", + " else:\n", + " return fibonacci(n-1) + fibonacci(n-2)\n", + "```\n", + "\n", + "**Explanation:**\n", + "- **Base Cases:** The function first checks if `n` is 0 or 1. If `n` is 0, it returns 0. If `n` is 1, it returns 1. These are the simplest cases and serve as the stopping conditions for the recursion.\n", + "- **Recursive Case:** For any `n` greater than 1, the function calls itself with `n-1` and `n-2` and returns their sum. This is because the nth Fibonacci number is the sum of the (n-1)th and (n-2)th numbers.\n", + "\n", + "**Example Usage:**\n", + "To find the 5th Fibonacci number:\n", + "- `fibonacci(5)` calls `fibonacci(4) + fibonacci(3)`\n", + "- `fibonacci(4)` calls `fibonacci(3) + fibonacci(2)`\n", + "- And so on, until it reaches the base cases.\n", + "\n", + "This approach efficiently breaks down the problem into smaller subproblems, each of which is solved recursively until the base cases are reached.\n", + "-----END-----\n", + "\n" + ] + } + ], + "source": [ + "runner.generate_response(example_prompts,\n", + " [MaxTimeLogitsProcessor(runner.tokenizer, max_time=3, complete_sentences=True, \n", + " boost_token_str=\"\"),\n", + " TriggerPhraseLogitsProcessor(runner.tokenizer, batch_size=1, \n", + " phrase=\"...Here is the function:\\n```python\",\n", + " trigger_time=4)],\n", + " max_tokens=4096)" + ] } ], "metadata": { "kernelspec": { - "display_name": "logproc", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -469,7 +544,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.10.17" } }, "nbformat": 4, diff --git a/examples/transformers/utils.py b/lpz_examples/transformers/utils.py similarity index 100% rename from examples/transformers/utils.py rename to lpz_examples/transformers/utils.py diff --git a/examples/trtllm/README.md b/lpz_examples/trtllm/README.md similarity index 67% rename from examples/trtllm/README.md rename to lpz_examples/trtllm/README.md index 530329f..275b749 100644 --- a/examples/trtllm/README.md +++ b/lpz_examples/trtllm/README.md @@ -5,18 +5,18 @@ It's recommended to use [TensorRT-LLM release containers](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags) (>= 0.20.0) that has TensorRT-LLM pre-installed. Alternatively, please follow [this documentation](https://nvidia.github.io/TensorRT-LLM/installation/linux.html) to install it in [NGC PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags) (>=25.04). -## Examples +## lpz_examples ### GenLengthLogitsProcessor A logits processor that adjusts the likelihood of the end-of-sequence (EOS) token based on the length of the generated sequence, encouraging or discouraging shorter answers. ``` -python examples/trtllm/gen_length_logits_processor.py +python lpz_examples/trtllm/gen_length_logits_processor.py ``` ### CiteFromPromptLogitsProcessor A logits processor which boosts or diminishes the likelihood of tokens present in the prompt (and optionally EOS token) to encourage the model to generate tokens similar to those seen in the prompt or vice versa. ``` -python examples/trtllm/cite_prompt_logits_processor.py -p "Retrieved information: +python lpz_examples/trtllm/cite_prompt_logits_processor.py -p "Retrieved information: Pokémon is a Japanese media franchise consisting of video games, animated series and films, a trading card game, and other related media. The franchise takes place in a shared universe in which humans co-exist with creatures known as Pokémon, a large variety of species endowed with special powers. The franchise's target audience is children aged 5 to 12, but it is known to attract people of all ages. @@ -27,13 +27,13 @@ python examples/trtllm/cite_prompt_logits_processor.py -p "Retrieved information ### ForceLastPhraseLogitsProcessor A logits processor which forces LLMs to use the given phrase before they finalize their answers. Most common use cases can be providing references, thanking user with context etc. ``` -python examples/trtllm/last_phrase_logits_processor.py +python lpz_examples/trtllm/last_phrase_logits_processor.py ``` ### MultipleChoiceLogitsProcessor A logits processor to answer multiple choice questions with one of the choices. ``` -python examples/trtllm/multiple_choice_logits_processor.py -p "I am getting a lot of calls during the day. What is more important for me to consider when I buy a new phone? +python lpz_examples/trtllm/multiple_choice_logits_processor.py -p "I am getting a lot of calls during the day. What is more important for me to consider when I buy a new phone? 0. Camera 1. Screen resolution 2. Operating System @@ -43,11 +43,17 @@ python examples/trtllm/multiple_choice_logits_processor.py -p "I am getting a lo ### TriggerPhraseLogitsProcessor A logits processor which triggers phrases when it encounters a given token. ``` -python examples/trtllm/trigger_phrase_logits_processor.py -p "Generate a python function to calculate nth fibonacci number. Make it recursive. Keep thinking short." +python lpz_examples/trtllm/trigger_phrase_logits_processor.py -p "Generate a python function to calculate nth fibonacci number. Make it recursive. Keep thinking short." ``` ### PreventHallucinationLogitsProcessor A logits processor that mitigates hallucinated model outputs by enforcing a predefined fallback phrase when token confidence falls below a specified threshold. ``` -python examples/trtllm/prevent_hallucination_logits_processor.py -p "What are Nobel Prizes? Name the winners in 1977" +python lpz_examples/trtllm/prevent_hallucination_logits_processor.py -p "What are Nobel Prizes? Name the winners in 1977" +``` + +### MaxTimeLogitsProcessor +A logits processor that enforces the end-of-sentence (EOS) token after a specified maximum time passes, optionally waiting for a new line or a full stop. Useful for controlling generation time and ensuring responses complete within time constraints. +``` +python lpz_examples/trtllm/max_time_logits_processor.py ``` diff --git a/examples/trtllm/cite_prompt_logits_processor.py b/lpz_examples/trtllm/cite_prompt_logits_processor.py similarity index 100% rename from examples/trtllm/cite_prompt_logits_processor.py rename to lpz_examples/trtllm/cite_prompt_logits_processor.py diff --git a/examples/trtllm/gen_length_logits_processor.py b/lpz_examples/trtllm/gen_length_logits_processor.py similarity index 100% rename from examples/trtllm/gen_length_logits_processor.py rename to lpz_examples/trtllm/gen_length_logits_processor.py diff --git a/examples/trtllm/last_phrase_logits_processor.py b/lpz_examples/trtllm/last_phrase_logits_processor.py similarity index 100% rename from examples/trtllm/last_phrase_logits_processor.py rename to lpz_examples/trtllm/last_phrase_logits_processor.py diff --git a/examples/trtllm/max_time_logits_processor.py b/lpz_examples/trtllm/max_time_logits_processor.py similarity index 100% rename from examples/trtllm/max_time_logits_processor.py rename to lpz_examples/trtllm/max_time_logits_processor.py diff --git a/examples/trtllm/multiple_choice_logits_processor.py b/lpz_examples/trtllm/multiple_choice_logits_processor.py similarity index 100% rename from examples/trtllm/multiple_choice_logits_processor.py rename to lpz_examples/trtllm/multiple_choice_logits_processor.py diff --git a/examples/trtllm/prevent_hallucination_logits_processor.py b/lpz_examples/trtllm/prevent_hallucination_logits_processor.py similarity index 100% rename from examples/trtllm/prevent_hallucination_logits_processor.py rename to lpz_examples/trtllm/prevent_hallucination_logits_processor.py diff --git a/examples/trtllm/trigger_phrase_logits_processor.py b/lpz_examples/trtllm/trigger_phrase_logits_processor.py similarity index 100% rename from examples/trtllm/trigger_phrase_logits_processor.py rename to lpz_examples/trtllm/trigger_phrase_logits_processor.py diff --git a/examples/trtllm/utils.py b/lpz_examples/trtllm/utils.py similarity index 100% rename from examples/trtllm/utils.py rename to lpz_examples/trtllm/utils.py diff --git a/examples/vllm/cite_prompt_logits_processor.ipynb b/lpz_examples/vllm/cite_prompt_logits_processor.ipynb similarity index 99% rename from examples/vllm/cite_prompt_logits_processor.ipynb rename to lpz_examples/vllm/cite_prompt_logits_processor.ipynb index 5c4ab63..732c21c 100644 --- a/examples/vllm/cite_prompt_logits_processor.ipynb +++ b/lpz_examples/vllm/cite_prompt_logits_processor.ipynb @@ -70,7 +70,7 @@ } ], "source": [ - "from examples.vllm.utils import vLLMRunner\n", + "from lpz_examples.vllm.utils import vLLMRunner\n", "from logits_processor_zoo.vllm import CiteFromPromptLogitsProcessor\n", "\n", "\n", diff --git a/examples/vllm/force_last_phrase_logits_processor.ipynb b/lpz_examples/vllm/force_last_phrase_logits_processor.ipynb similarity index 99% rename from examples/vllm/force_last_phrase_logits_processor.ipynb rename to lpz_examples/vllm/force_last_phrase_logits_processor.ipynb index 2d063a6..9b56d9b 100644 --- a/examples/vllm/force_last_phrase_logits_processor.ipynb +++ b/lpz_examples/vllm/force_last_phrase_logits_processor.ipynb @@ -70,7 +70,7 @@ } ], "source": [ - "from examples.vllm.utils import vLLMRunner\n", + "from lpz_examples.vllm.utils import vLLMRunner\n", "from logits_processor_zoo.vllm import ForceLastPhraseLogitsProcessor\n", "\n", "\n", diff --git a/examples/vllm/gen_length_logits_processor.ipynb b/lpz_examples/vllm/gen_length_logits_processor.ipynb similarity index 99% rename from examples/vllm/gen_length_logits_processor.ipynb rename to lpz_examples/vllm/gen_length_logits_processor.ipynb index 9b836b3..7ab9c76 100644 --- a/examples/vllm/gen_length_logits_processor.ipynb +++ b/lpz_examples/vllm/gen_length_logits_processor.ipynb @@ -87,7 +87,7 @@ } ], "source": [ - "from examples.vllm.utils import vLLMRunner\n", + "from lpz_examples.vllm.utils import vLLMRunner\n", "from logits_processor_zoo.vllm import GenLengthLogitsProcessor\n", "\n", "example_prompts =[\n", diff --git a/examples/vllm/max_time_logits_processor.ipynb b/lpz_examples/vllm/max_time_logits_processor.ipynb similarity index 99% rename from examples/vllm/max_time_logits_processor.ipynb rename to lpz_examples/vllm/max_time_logits_processor.ipynb index 568e7f7..471223d 100644 --- a/examples/vllm/max_time_logits_processor.ipynb +++ b/lpz_examples/vllm/max_time_logits_processor.ipynb @@ -99,7 +99,7 @@ } ], "source": [ - "from examples.vllm.utils import vLLMRunner\n", + "from lpz_examples.vllm.utils import vLLMRunner\n", "from logits_processor_zoo.vllm import MaxTimeLogitsProcessor\n", "\n", "\n", diff --git a/examples/vllm/multiple_choice_logits_processor.ipynb b/lpz_examples/vllm/multiple_choice_logits_processor.ipynb similarity index 99% rename from examples/vllm/multiple_choice_logits_processor.ipynb rename to lpz_examples/vllm/multiple_choice_logits_processor.ipynb index cd6f85f..27ad873 100644 --- a/examples/vllm/multiple_choice_logits_processor.ipynb +++ b/lpz_examples/vllm/multiple_choice_logits_processor.ipynb @@ -87,7 +87,7 @@ } ], "source": [ - "from examples.vllm.utils import vLLMRunner\n", + "from lpz_examples.vllm.utils import vLLMRunner\n", "from logits_processor_zoo.vllm import MultipleChoiceLogitsProcessor\n", "\n", "\n", diff --git a/examples/vllm/performance_profiling.ipynb b/lpz_examples/vllm/performance_profiling.ipynb similarity index 99% rename from examples/vllm/performance_profiling.ipynb rename to lpz_examples/vllm/performance_profiling.ipynb index d032077..fdd64d4 100644 --- a/examples/vllm/performance_profiling.ipynb +++ b/lpz_examples/vllm/performance_profiling.ipynb @@ -73,7 +73,7 @@ } ], "source": [ - "from examples.vllm.utils import vLLMRunner\n", + "from lpz_examples.vllm.utils import vLLMRunner\n", "from logits_processor_zoo.vllm import MultipleChoiceLogitsProcessor\n", "\n", "\n", diff --git a/examples/vllm/prevent_hallucination_logits_processor.ipynb b/lpz_examples/vllm/prevent_hallucination_logits_processor.ipynb similarity index 99% rename from examples/vllm/prevent_hallucination_logits_processor.ipynb rename to lpz_examples/vllm/prevent_hallucination_logits_processor.ipynb index 6405b6e..88642e8 100644 --- a/examples/vllm/prevent_hallucination_logits_processor.ipynb +++ b/lpz_examples/vllm/prevent_hallucination_logits_processor.ipynb @@ -73,7 +73,7 @@ } ], "source": [ - "from examples.vllm.utils import vLLMRunner\n", + "from lpz_examples.vllm.utils import vLLMRunner\n", "from logits_processor_zoo.vllm import PreventHallucinationLogitsProcessor\n", "\n", "runner = vLLMRunner()" diff --git a/examples/vllm/trigger_phrase_logits_processor.ipynb b/lpz_examples/vllm/trigger_phrase_logits_processor.ipynb similarity index 65% rename from examples/vllm/trigger_phrase_logits_processor.ipynb rename to lpz_examples/vllm/trigger_phrase_logits_processor.ipynb index e438883..f5f02ba 100644 --- a/examples/vllm/trigger_phrase_logits_processor.ipynb +++ b/lpz_examples/vllm/trigger_phrase_logits_processor.ipynb @@ -10,7 +10,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "/data/projects/logproc_ws/logits-processor-zoo\n" + "/home/aerdem/projects/nvidia/logits-processor-zoo\n" ] } ], @@ -24,83 +24,59 @@ "id": "b89279fe", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/data/envs/logproc/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "INFO 07-08 11:01:41 [__init__.py:244] Automatically detected platform cuda.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-07-08 11:01:49,020\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "INFO 07-08 11:02:04 [config.py:823] This model supports multiple tasks: {'generate', 'score', 'classify', 'embed', 'reward'}. Defaulting to 'generate'.\n", - "WARNING 07-08 11:02:04 [config.py:3271] Casting torch.bfloat16 to torch.float16.\n", - "WARNING 07-08 11:02:04 [cuda.py:91] To see benefits of async output processing, enable CUDA graph. Since, enforce-eager is enabled, async output processor cannot be used\n", - "INFO 07-08 11:02:04 [llm_engine.py:230] Initializing a V0 LLM engine (v0.9.1) with config: model='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', speculative_config=None, tokenizer='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=16384, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=None, served_model_name=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=None, chunked_prefill_enabled=False, use_async_output_proc=False, pooler_config=None, compilation_config={\"level\":0,\"debug_dump_path\":\"\",\"cache_dir\":\"\",\"backend\":\"\",\"custom_ops\":[],\"splitting_ops\":[],\"use_inductor\":true,\"compile_sizes\":[],\"inductor_compile_config\":{\"enable_auto_functionalized_v2\":false},\"inductor_passes\":{},\"use_cudagraph\":true,\"cudagraph_num_of_warmups\":0,\"cudagraph_capture_sizes\":[],\"cudagraph_copy_inputs\":false,\"full_cuda_graph\":false,\"max_capture_size\":0,\"local_cache_dir\":null}, use_cached_outputs=False, \n", - "INFO 07-08 11:02:06 [cuda.py:327] Using Flash Attention backend.\n", - "INFO 07-08 11:02:07 [parallel_state.py:1065] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0\n", - "INFO 07-08 11:02:07 [model_runner.py:1171] Starting to load model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B...\n", - "INFO 07-08 11:02:07 [weight_utils.py:292] Using model weights format ['*.safetensors']\n", - "INFO 07-08 11:02:07 [weight_utils.py:345] No model.safetensors.index.json found in remote.\n" + "INFO 07-08 16:22:43 [__init__.py:243] Automatically detected platform cuda.\n", + "INFO 07-08 16:22:44 [__init__.py:31] Available plugins for group vllm.general_plugins:\n", + "INFO 07-08 16:22:44 [__init__.py:33] - lora_filesystem_resolver -> vllm.plugins.lora_resolvers.filesystem_resolver:register_filesystem_resolver\n", + "INFO 07-08 16:22:44 [__init__.py:36] All plugins in this group will be loaded. Set `VLLM_PLUGINS` to control which plugins to load.\n", + "WARNING 07-08 16:22:46 [config.py:3135] Casting torch.bfloat16 to torch.float16.\n", + "INFO 07-08 16:22:52 [config.py:793] This model supports multiple tasks: {'embed', 'reward', 'score', 'classify', 'generate'}. Defaulting to 'generate'.\n", + "WARNING 07-08 16:22:52 [arg_utils.py:1420] Chunked prefill is enabled by default for models with max_model_len > 32K. Chunked prefill might not work with some features or models. If you encounter any issues, please disable by launching with --enable-chunked-prefill=False.\n", + "INFO 07-08 16:22:52 [config.py:2118] Chunked prefill is enabled with max_num_batched_tokens=2048.\n", + "WARNING 07-08 16:22:52 [cuda.py:87] To see benefits of async output processing, enable CUDA graph. Since, enforce-eager is enabled, async output processor cannot be used\n", + "INFO 07-08 16:22:52 [llm_engine.py:230] Initializing a V0 LLM engine (v0.9.0) with config: model='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', speculative_config=None, tokenizer='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=None, served_model_name=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=None, chunked_prefill_enabled=True, use_async_output_proc=False, pooler_config=None, compilation_config={\"compile_sizes\": [], \"inductor_compile_config\": {\"enable_auto_functionalized_v2\": false}, \"cudagraph_capture_sizes\": [], \"max_capture_size\": 0}, use_cached_outputs=False, \n", + "INFO 07-08 16:22:54 [cuda.py:292] Using Flash Attention backend.\n", + "INFO 07-08 16:22:54 [parallel_state.py:1064] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0\n", + "INFO 07-08 16:22:54 [model_runner.py:1170] Starting to load model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B...\n", + "INFO 07-08 16:22:55 [weight_utils.py:291] Using model weights format ['*.safetensors']\n", + "INFO 07-08 16:22:55 [weight_utils.py:344] No model.safetensors.index.json found in remote.\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00\n", @@ -251,23 +227,21 @@ "\n", "Let me test this function with some examples. For n=0, it returns 0. For n=1, returns 1. For n=2, it's F(1)+F(0) = 1+0=1. For n=3, F(2)+F(1)=1+1=2. That looks correct.\n", "\n", - "Wait, but sometimes people define the Fibonacci sequence starting with F(1)=1, F(2)=1, F(3)=2, etc. So, if the function is called with n=5, it should return 5. Let me see: F(5) is 5, which is correct.\n", + "Wait, but sometimes people define the Fibonacci sequence starting with F(1)=1, F(2)=1, F(3)=2, etc. So, if the function is called with n=5, it should return 5. Let me see: F(5) is 5, which matches the standard definition. So, the function should work regardless of the starting point as long as the base cases are correct.\n", "\n", - "Another test case: n=5. Let's compute it step by step. F(0)=0, F(1)=1, F(2)=1, F(3)=2, F(4)=3, F(5)=5. So the function should return 5 for n=5.\n", + "Another thing to consider is the base cases. If the function is called with n=0, it returns 0, which is correct. For n=1, returns 1. For n=2, returns 1, which is correct. So, the function should handle all non-negative integers correctly.\n", "\n", "I think this should work. So, the function is straightforward. It's a simple recursive implementation, but it's not the most efficient for large n. However, for the purpose of this problem, it's acceptable.\n", "\n", - "Wait, but in the problem statement, it says to make it recursive. So, the function as written is recursive, but perhaps it's better to memoize it for better performance. But that's beyond the scope here. So, I'll proceed with the basic recursive approach.\n", - "\n", - "Wait, but in the function I wrote, for n=0, it returns 0, which is correct. For n=1, returns 1. For n=2, returns 1, which is correct. So, the function seems to handle all cases correctly.\n", + "Wait, but in the problem statement, it says to make it recursive. So, the function should call itself, which it does. So, the function is recursive as required.\n", "\n", - "I think that's a solid approach. So, the function is as written above.\n", + "Wait, but in the function I wrote, for n=0, it returns 0, which is correct. For n=1, returns 1. For n=2, returns 1, which is correct. For n=3, returns 2, etc. So, the function is correct.\n", "\n", "\n", "To solve this problem, we need to generate the nth Fibonacci number using a recursive approach. The Fibonacci sequence is a series of numbers where each number is the sum of the two preceding ones, starting from 0 and 1. \n", "\n", "### Approach\n", - "The approach to solve this problem involves using recursion, which is a method where the function calls itself with a smaller input until it reaches a base case. Here's a step-by-step breakdown of the approach:\n", + "The approach to solve this problem involves using recursion, which is a method where a function calls itself to solve smaller instances of the same problem. Here's a step-by-step breakdown of the approach:\n", "\n", "1. **Base Cases**: \n", " - If `n` is 0, return 0.\n", @@ -287,14 +261,14 @@ " elif n == 1:\n", " return 1\n", " else:\n", - " return fibonacci(n - 1) + fibonacci(n - 2)\n", + " return fibonacci(n-1) + fibonacci(n-2)\n", "```\n", "\n", "### Explanation\n", "- **Base Cases**: The function first checks if `n` is 0 or 1. If `n` is 0, it returns 0. If `n` is 1, it returns 1. These are the simplest cases of the Fibonacci sequence.\n", "- **Recursive Case**: For any `n` greater than 1, the function calls itself with `n-1` and `n-2` and returns the sum of these two recursive calls. This builds up the solution by solving smaller subproblems and combining their results.\n", "\n", - "This approach is straightforward and easy to understand, but it's important to note that for large values of `n`, this method can be inefficient due to repeated calculations. However, for the purpose of this problem, the recursive approach is sufficient.\n", + "This approach is straightforward and leverages the divide-and-conquer strategy inherent in recursion, making it easy to understand and implement. However, it's important to note that this approach has a time complexity of O(2^n) due to the exponential number of function calls, which is not efficient for large values of `n`. For larger values, an iterative approach or memoization would be more efficient.\n", "-----END-----\n", "\n" ] @@ -354,9 +328,9 @@ "\n", "Let me test this function with some examples. For n=0, it returns 0. For n=1, returns 1. For n=2, it's F(1)+F(0) = 1+0=1. For n=3, F(2)+F(1)=1+1=2. That looks correct.\n", "\n", - "Wait, but sometimes people define the Fibonacci sequence starting with F(1)=1, F(2)=1, F(3)=2, etc. So, if the function is called with n=5, it should return 5. Let me see: F(5) is 5, which is correct.\n", + "Wait, but sometimes people define the Fibonacci sequence starting with F(1)=1, F(2)=1, F(3)=2, etc. So, if the function is called with n=5, it should return 5. Let me see: F(5) is 5, which matches the standard definition. So, the function should work regardless of the starting point as long as the base cases are correct.\n", "\n", - "Another test case: n=5. Let's compute it step by step. F(0)=0, F(1)=1, F(2)=1, F(3)=2, F(4)=3, F(5)=5. So the function should return 5 for n=5.\n", + "Another thing to consider is the base cases. If the function is called with n=0, it returns 0, which is correct. For n=1, returns 1. For n=2, returns 1, which is correct. So, the function should handle all non-negative integers correctly.\n", "\n", "I think this should work. So, the function is straightforward. It's a simple recursive implementation, but it's not the most efficient for large n. However, for the purpose of this problem, it's acceptable.\n", "\n", @@ -438,11 +412,105 @@ " ],\n", " max_tokens=4096)" ] + }, + { + "cell_type": "markdown", + "id": "8b560028", + "metadata": {}, + "source": [ + "## Only think for 3 seconds, share the function within 4 seconds" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "03ae5612", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prompt: \n", + "Generate a python function to calculate nth fibonacci number. Make it recursive. Keep thinking short.\n", + "\n", + "Okay, I need to write a Python function to calculate the nth Fibonacci number using recursion. Hmm, let's think about how the Fibonacci sequence works. The Fibonacci sequence starts with 0 and 1, and each subsequent number is the sum of the two preceding ones. So, the sequence goes 0, 1, 1, 2, 3, 5, 8, and so on.\n", + "\n", + "Wait, but sometimes people define it starting with 1 and 1. I should clarify that. Oh right, the standard definition starts with F(0) = 0, F(1) = 1, F(2) = 1, F(3) = 2, etc. So, for example, F(5) would be 5.\n", + "\n", + "Now, the problem says to make it recursive. Recursion means the function will call itself with a smaller problem each time. So, the base cases are when n is 0 or 1, and then for other values, it adds the two previous numbers.\n", + "\n", + "Wait, but in the standard definition, F(0) is 0, F(1) is 1, F(2) is 1, F(3) is 2, etc. So, for n=0, return 0; for n=1, return 1. For n >=2, return F(n-1) + F(n-2).\n", + "\n", + "So, the recursive function can be written as:\n", + "\n", + "def fibonacci(n):\n", + " if n == 0:\n", + " return 0\n", + " elif n == 1:\n", + " return 1\n", + " else:\n", + " return fibonacci(n-1) + fibonacci(n-2)\n", + "\n", + "But wait, recursion can be inefficient for large n because it recalculates the same values many times.\n", + "\n", + "To calculate the nth Fibonacci number using recursion, we can define a function that calls itself with smaller values of n until it reaches the base cases.\n", + "\n", + "**Step-by-Step Explanation:**\n", + "\n", + "1. **Base Cases:**\n", + " - If `n` is 0, return 0.\n", + " - If `n` is 1, return 1.\n", + "\n", + "2. **Recursive...Here is the function:\n", + "```python\n", + "def fibonacci(n):\n", + " if n == 0:\n", + " return 0\n", + " elif n == 1:\n", + " return 1\n", + " else:\n", + " return fibonacci(n-1) + fibonacci(n-2)\n", + "```\n", + "\n", + "**Answer:**\n", + "\n", + "```python\n", + "def fibonacci(n):\n", + " if n == 0:\n", + " return 0\n", + " elif n == 1:\n", + " return 1\n", + " else:\n", + " return fibonacci(n-1) + fibonacci(n-2)\n", + "```\n", + "-----END-----\n", + "\n" + ] + } + ], + "source": [ + "runner.generate_response(example_prompts,\n", + " [MaxTimeLogitsProcessor(runner.tokenizer, max_time=3, complete_sentences=True, \n", + " boost_token_str=\"\"),\n", + " TriggerPhraseLogitsProcessor(runner.tokenizer,\n", + " phrase=\"...Here is the function:\\n```python\",\n", + " trigger_time=4)],\n", + " max_tokens=4096)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd81d03c", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "logproc", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -456,7 +524,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.10.17" } }, "nbformat": 4, diff --git a/examples/vllm/utils.py b/lpz_examples/vllm/utils.py similarity index 100% rename from examples/vllm/utils.py rename to lpz_examples/vllm/utils.py diff --git a/examples/vllm/vllm_serve.ipynb b/lpz_examples/vllm/vllm_serve.ipynb similarity index 100% rename from examples/vllm/vllm_serve.ipynb rename to lpz_examples/vllm/vllm_serve.ipynb