11package amazonlinux
22
33import (
4+ "context"
5+ "io"
6+ "net/http"
47 "net/url"
8+ "strings"
59
610 "github.com/maxgio92/krawler/pkg/distro"
711 "github.com/maxgio92/krawler/pkg/output"
812 p "github.com/maxgio92/krawler/pkg/packages"
13+ "github.com/maxgio92/krawler/pkg/packages/rpm"
914 "github.com/maxgio92/krawler/pkg/scrape"
1015)
1116
@@ -24,7 +29,7 @@ func (a *AmazonLinux) ConfigureCommon(def distro.Config, config distro.Config) e
2429 return nil
2530}
2631
27- // Returns the list of version-specific mirror URLs.
32+ // BuildMirrorURLs returns the list of version-specific mirror URLs.
2833func (a * AmazonLinux ) BuildMirrorURLs (mirrors []p.Mirror , versions []distro.Version ) ([]* url.URL , error ) {
2934 versions , err := a .buildVersions (mirrors , versions )
3035 if err != nil {
@@ -51,8 +56,8 @@ func (a *AmazonLinux) BuildMirrorURLs(mirrors []p.Mirror, versions []distro.Vers
5156 return nil , distro .ErrNoDistroVersionSpecified
5257}
5358
54- // Returns the list of repositories URLs.
55- func BuildRepositoriesURLs (roots []* url.URL , repositories []p.Repository ) ([]* url.URL , error ) {
59+ // BuildRepositoryURLs returns the list of repositories URLs.
60+ func BuildRepositoryURLs (roots []* url.URL , repositories []p.Repository ) ([]* url.URL , error ) {
5661 var urls []* url.URL
5762
5863 for _ , root := range roots {
@@ -74,7 +79,7 @@ func BuildRepositoriesURLs(roots []*url.URL, repositories []p.Repository) ([]*ur
7479 return urls , nil
7580}
7681
77- // Returns a list of distro versions, considering the user-provided configuration,
82+ // buildVersions returns a list of distro versions, considering the user-provided configuration,
7883// and if not, the ones available on configured mirrors.
7984func (a * AmazonLinux ) buildVersions (mirrors []p.Mirror , staticVersions []distro.Version ) ([]distro.Version , error ) {
8085 if staticVersions != nil {
@@ -91,7 +96,7 @@ func (a *AmazonLinux) buildVersions(mirrors []p.Mirror, staticVersions []distro.
9196 return dynamicVersions , nil
9297}
9398
94- // Returns the list of the current available distro versions, by scraping
99+ // crawlVersions returns the list of the current available distro versions, by scraping
95100// the specified mirrors, dynamically.
96101func (a * AmazonLinux ) crawlVersions (mirrors []p.Mirror ) ([]distro.Version , error ) {
97102 versions := []distro.Version {}
@@ -123,3 +128,108 @@ func (a *AmazonLinux) crawlVersions(mirrors []p.Mirror) ([]distro.Version, error
123128
124129 return versions , nil
125130}
131+
132+ // SearchPackages scrapes each mirror, for each distro version, for each repository,
133+ // for each architecture, and returns slice of Package and optionally an error.
134+ func (a * AmazonLinux ) SearchPackages (options p.SearchOptions ) ([]p.Package , error ) {
135+ a .Config .Output .Logger = options .Log ()
136+
137+ // Build distribution version-specific mirror root URLs.
138+ perVersionMirrorURLs , err := a .BuildMirrorURLs (a .Config .Mirrors , a .Config .Versions )
139+ if err != nil {
140+ return nil , err
141+ }
142+
143+ // Build available repository URLs based on provided configuration,
144+ // for each distribution version.
145+ repositoriesURLrefs , err := BuildRepositoryURLs (perVersionMirrorURLs , a .Config .Repositories )
146+ if err != nil {
147+ return nil , err
148+ }
149+
150+ // Dereference repository URLs.
151+ repositoryURLs , err := a .dereferenceRepositoryURLs (repositoriesURLrefs , a .Config .Archs )
152+ if err != nil {
153+ return nil , err
154+ }
155+
156+ // Get RPM packages from each repository.
157+ rss := []string {}
158+ for _ , ru := range repositoryURLs {
159+ rss = append (rss , ru .String ())
160+ }
161+
162+ searchOptions := rpm .NewSearchOptions (& options , a .Config .Archs , rss )
163+ rpmPackages , err := rpm .SearchPackages (searchOptions )
164+ if err != nil {
165+ return nil , err
166+ }
167+
168+ return rpmPackages , nil
169+ }
170+
171+ func (a * AmazonLinux ) dereferenceRepositoryURLs (repoURLs []* url.URL , archs []p.Architecture ) ([]* url.URL , error ) {
172+ var urls []* url.URL
173+
174+ for _ , ar := range archs {
175+ for _ , v := range repoURLs {
176+ r , err := a .dereferenceRepositoryURL (v , ar )
177+ if err != nil {
178+ return nil , err
179+ }
180+
181+ if r != nil {
182+ urls = append (urls , r )
183+ }
184+ }
185+ }
186+
187+ return urls , nil
188+ }
189+
190+ func (a * AmazonLinux ) dereferenceRepositoryURL (src * url.URL , arch p.Architecture ) (* url.URL , error ) {
191+ var dest * url.URL
192+
193+ mirrorListURL , err := url .JoinPath (src .String (), string (arch ), "mirror.list" )
194+ if err != nil {
195+ return nil , err
196+ }
197+
198+ req , err := http .NewRequestWithContext (context .Background (), http .MethodGet , mirrorListURL , nil )
199+ if err != nil {
200+ return nil , err
201+ }
202+
203+ resp , err := http .DefaultClient .Do (req )
204+ if err != nil {
205+ return nil , err
206+ }
207+ defer resp .Body .Close ()
208+
209+ if resp .StatusCode != http .StatusOK {
210+ a .Config .Output .Logger .Error ("Amazon Linux v2023 repository URL not valid to be dereferenced" )
211+ //nolint:nilnil
212+ return nil , nil
213+ }
214+
215+ if resp .Body == nil {
216+ a .Config .Output .Logger .Error ("empty response from Amazon Linux v2023 repository reference URL" )
217+ //nolint:nilnil
218+ return nil , nil
219+ }
220+
221+ b , err := io .ReadAll (resp .Body )
222+ if err != nil {
223+ return nil , err
224+ }
225+
226+ // Get first repository URL available, no matter what the geolocation.
227+ s := strings .Split (string (b ), "\n " )[0 ]
228+
229+ dest , err = url .Parse (s )
230+ if err != nil {
231+ return nil , err
232+ }
233+
234+ return dest , nil
235+ }
0 commit comments