Skip to content

Commit fcf20bd

Browse files
feat: add gen ai summaries (#198)
* feat: add gen ai summaries * chore: add xss to llm output * chore: bump version --------- Co-authored-by: Tom Schönmann <[email protected]>
1 parent 09b91da commit fcf20bd

File tree

11 files changed

+978
-183
lines changed

11 files changed

+978
-183
lines changed

web/.env.example

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
VITE_RELEASE_CHANNEL = "development"
2+
3+
VITE_API_KEY =
4+
5+
VITE_PLAUSIBLE_SITE_ID =
6+
VITE_PLAUSIBLE_API_KEY =
7+
8+
VITE_AP_PKGS_URL
9+
VITE_SELECT_PKG_URL =
10+
VITE_OVERVIEW_PKGS_URL =
11+
VITE_SITEMAP_PKGS_URL =
12+
13+
GOOGLE_GENERATIVE_AI_API_KEY =

web/app/ai/packages.ts

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
2+
import TTLCache from "@isaacs/ttlcache";
3+
import { generateText } from "ai";
4+
import { hoursToMilliseconds } from "date-fns";
5+
import xss from "xss";
6+
7+
type CacheKey = "trending-packages-summary" | "top-downloads-summary";
8+
9+
export class AIPackageService {
10+
private static cache = new TTLCache<CacheKey, string>({
11+
ttl: hoursToMilliseconds(6),
12+
max: 100,
13+
});
14+
15+
static async generateTopDownloadsSummary(context: string) {
16+
const cachedData = this.cache.get("trending-packages-summary");
17+
if (cachedData) {
18+
return cachedData;
19+
}
20+
21+
const google = createGoogleGenerativeAI();
22+
23+
const prompt = [
24+
"You're a world-class summarizer of scientific code packages written in R.",
25+
"Given the currently most downloaded packages on CRAN and their descriptions below, summarize a concise analysis of the trending TOPICS of those packages.",
26+
"The goal is to get a birds-eye-view of the current trends. Respond with highly concise, to-the-point, well-written prose.",
27+
"You MUST respond in HTML-format.",
28+
"Never use headings or lists.",
29+
].join(" ");
30+
31+
const { text } = await generateText({
32+
model: google("gemini-1.5-flash"),
33+
prompt: `${prompt}\n---\n${context}`,
34+
// Just a safeguard to prevent excessive token usage.
35+
maxTokens: 8192,
36+
temperature: 0.3,
37+
});
38+
39+
this.cache.set("top-downloads-summary", text);
40+
41+
return xss(text);
42+
}
43+
44+
static async generateTrendsSummary(context: string) {
45+
const cachedData = this.cache.get("trending-packages-summary");
46+
if (cachedData) {
47+
return cachedData;
48+
}
49+
50+
const google = createGoogleGenerativeAI();
51+
52+
const prompt = [
53+
"You're a world-class summarizer of scientific code packages written in R.",
54+
"Given the currently trending packages and their descriptions below, summarize a highly concise analysis of the trending TOPICS of those packages.",
55+
"The goal is to get a birds-eye-view of the current trends. Respond with concise, to-the-point, well-written prose.",
56+
"You MUST respond in HTML-format. Never use headings or lists.",
57+
].join(" ");
58+
59+
const { text } = await generateText({
60+
model: google("gemini-1.5-flash"),
61+
prompt: `${prompt}\n---\n${context}`,
62+
// Just a safeguard to prevent excessive token usage.
63+
maxTokens: 8192,
64+
});
65+
66+
this.cache.set("trending-packages-summary", text);
67+
68+
return xss(text);
69+
}
70+
}

web/app/data/env.ts

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,8 @@ export const envSchema = z.object({
66
VITE_API_KEY: z.string().describe("GitHub Personal Access Token"),
77
VITE_PLAUSIBLE_SITE_ID: z.string().describe("Plausible Analytics Site ID"),
88
VITE_PLAUSIBLE_API_KEY: z.string().describe("Plausible Analytics API Key"),
9-
VITE_STATS_GH_TRENDS_BASE_URL: z
10-
.string()
11-
.url()
12-
.describe("GitHub Trends Base URL"),
13-
VITE_BASE_OG_POSTER_API_URL: z
14-
.string()
15-
.url()
16-
.describe("Open Graph Poster API URL"),
179
VITE_AP_PKGS_URL: z.string().url().describe("Packages by Author URL"),
1810
VITE_SELECT_PKG_URL: z.string().url().describe("Select Single Package URL"),
19-
VITE_TA_TEST_PKG_URL: z
20-
.string()
21-
.url()
22-
.describe("TypeAhead Test Packages URL"),
23-
VITE_TA_PKGS_URL: z.string().url().describe("TypeAhead Packages by Task URL"),
2411
VITE_OVERVIEW_PKGS_URL: z
2512
.string()
2613
.url()

web/app/licenses.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.
Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,45 @@
11
import { RiExternalLinkLine } from "@remixicon/react";
22
import { ExternalLink } from "./external-link";
3+
import { ReactNode } from "react";
4+
import clsx from "clsx";
5+
6+
export function ProvidedByLabel(props: {
7+
headline?: string;
8+
source: string;
9+
sourceUrl: string;
10+
icon?: ReactNode;
11+
className?: string;
12+
}) {
13+
const {
14+
headline = "Data provided by",
15+
source,
16+
sourceUrl,
17+
icon,
18+
className,
19+
} = props;
320

4-
export function DataProvidedByCRANLabel() {
521
return (
6-
<p className="text-gray-dim mt-16 text-right text-xs">
7-
Data provided by{" "}
22+
<p className={clsx("text-gray-dim mt-16 text-right text-xs", className)}>
23+
{headline}{" "}
824
<ExternalLink
9-
href="https://github.com/r-hub/cranlogs.app"
25+
href={sourceUrl}
1026
className="inline-flex items-center gap-1 underline underline-offset-4"
1127
>
12-
cranlogs
13-
<RiExternalLinkLine size={10} className="text-gray-dim" />
28+
{source}
29+
{icon || <RiExternalLinkLine size={10} className="text-gray-dim" />}
1430
</ExternalLink>
1531
</p>
1632
);
1733
}
34+
35+
export function DataProvidedByCRANLabel(props: { className?: string }) {
36+
const { className } = props;
37+
38+
return (
39+
<ProvidedByLabel
40+
source="CRAN"
41+
sourceUrl="https://cran.r-project.org/"
42+
className={className}
43+
/>
44+
);
45+
}

web/app/routes/_page.statistic._index.tsx

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,24 @@ export default function StatisticsOverviewPage() {
6666
>
6767
<ul className="grid grid-cols-2 gap-4 md:grid-cols-3 lg:grid-cols-4">
6868
<li>
69-
<Link prefetch="intent" to="/statistic/packages">
69+
<Link prefetch="intent" to="/statistic/packages/downloads">
7070
<InfoCard variant="bronze" icon="internal" className="min-h-60">
7171
<div className="space-y-2">
7272
<h3>Package downloads</h3>
7373
<p className="text-gray-dim">
74-
See what packages are trending on CRAN/E.
74+
See what packages are most downloaded from CRAN.
75+
</p>
76+
</div>
77+
</InfoCard>
78+
</Link>
79+
</li>
80+
<li>
81+
<Link prefetch="intent" to="/statistic/packages/trends">
82+
<InfoCard variant="bronze" icon="internal" className="min-h-60">
83+
<div className="space-y-2">
84+
<h3>Package trends</h3>
85+
<p className="text-gray-dim">
86+
See what packages are trending on CRAN.
7587
</p>
7688
</div>
7789
</InfoCard>

web/app/routes/_page.statistic.packages.tsx renamed to web/app/routes/_page.statistic.packages.downloads.tsx

Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,14 @@ import { Separator } from "../modules/separator";
1515
import { Tag } from "../modules/tag";
1616
import { ClientOnly } from "remix-utils/client-only";
1717
import { hoursToSeconds } from "date-fns";
18-
import { DataProvidedByCRANLabel } from "../modules/provided-by-label";
18+
import {
19+
DataProvidedByCRANLabel,
20+
ProvidedByLabel,
21+
} from "../modules/provided-by-label";
22+
import { AIPackageService } from "../ai/packages";
23+
import { PackageService } from "../data/package.service";
1924

20-
const anchors = composeAnchorItems(["Top Downloads", "Trending Packages"]);
25+
const anchors = composeAnchorItems(["Analysis", "Top Downloads"]);
2126

2227
export async function loader(params: LoaderFunctionArgs) {
2328
const { request } = params;
@@ -36,18 +41,36 @@ export async function loader(params: LoaderFunctionArgs) {
3641
topDownloadedCount = topDownloadedCountRes.data;
3742
}
3843

39-
const [topDownloads, trends] = await Promise.all([
44+
const [topDownloads] = await Promise.all([
4045
PackageInsightService.getTopDownloadedPackages(
4146
topDownloadedRange,
4247
topDownloadedCount,
4348
),
44-
PackageInsightService.getTrendingPackages(),
4549
]);
4650

51+
const packageSlugs = topDownloads.downloads.map((d) => d.package);
52+
const packageDetails = await Promise.allSettled(
53+
packageSlugs.map((slug) => PackageService.getPackage(slug)),
54+
).then((res) => {
55+
return res.map((r) => (r.status === "fulfilled" ? r.value : undefined));
56+
});
57+
const context = packageDetails
58+
.filter(Boolean)
59+
.map((pkg) =>
60+
[
61+
`# ${pkg?.name} (${topDownloads.downloads.find((d) => d.package === pkg?.name)?.downloads} downloads)`,
62+
pkg?.title,
63+
pkg?.description,
64+
].join("\n"),
65+
)
66+
.join("\n\n");
67+
68+
const summary = await AIPackageService.generateTopDownloadsSummary(context);
69+
4770
return json(
4871
{
4972
topDownloads: topDownloads || [],
50-
trends: trends || [],
73+
summary,
5174
},
5275
{
5376
headers: {
@@ -58,7 +81,7 @@ export async function loader(params: LoaderFunctionArgs) {
5881
}
5982

6083
export default function StatisticPackagesPage() {
61-
const { topDownloads, trends } = useLoaderData<typeof loader>();
84+
const { topDownloads, summary } = useLoaderData<typeof loader>();
6285

6386
const nrFormatter = Intl.NumberFormat();
6487

@@ -80,6 +103,22 @@ export default function StatisticPackagesPage() {
80103
</Anchors>
81104

82105
<PageContent>
106+
<PageContentSection headline="Analysis" fragment="analysis">
107+
<div
108+
className="max-w-prose leading-relaxed [&>p]:mt-3"
109+
dangerouslySetInnerHTML={{ __html: summary }}
110+
/>
111+
<div>
112+
<DataProvidedByCRANLabel />
113+
<ProvidedByLabel
114+
headline="Summary generated by"
115+
source="Google Gemini Flash 1.5"
116+
sourceUrl="https://deepmind.google/technologies/gemini/flash/"
117+
className="mt-3"
118+
/>
119+
</div>
120+
</PageContentSection>
121+
83122
<PageContentSection headline="Top downloads" fragment="top-downloads">
84123
<p>
85124
The top downloads are the packages that were downloaded the most
@@ -110,35 +149,6 @@ export default function StatisticPackagesPage() {
110149
</ul>
111150
<DataProvidedByCRANLabel />
112151
</PageContentSection>
113-
114-
<Separator />
115-
116-
<PageContentSection
117-
headline="Trending packages"
118-
fragment="trending-packages"
119-
>
120-
<p>
121-
Trending packages are the ones that were downloaded at least 1000
122-
times during last week, and that substantially increased their
123-
download counts, compared to the average weekly downloads in the
124-
previous 24 weeks.
125-
</p>
126-
<ul className="grid grid-cols-2 gap-4 md:grid-cols-3 lg:grid-cols-4">
127-
{trends.map(({ package: name, increase }) => (
128-
<li key={name}>
129-
<Link to={`/package/${encodeURIComponent(name)}`}>
130-
<InfoCard variant="sand" icon="external">
131-
<div className="space-y-2">
132-
<h3 className="font-mono">{name}</h3>
133-
<p className="text-gray-dim">{increase}</p>
134-
</div>
135-
</InfoCard>
136-
</Link>
137-
</li>
138-
))}
139-
</ul>
140-
<DataProvidedByCRANLabel />
141-
</PageContentSection>
142152
</PageContent>
143153
<Separator />
144154
</>

0 commit comments

Comments
 (0)