Top level class for building the gem repository index.
Create an indexer that will index the gems in directory.
# File lib/rubygems/indexer.rb, line 59 59: def initialize(directory, options = {}) 60: unless ''.respond_to? :to_xs then 61: raise "Gem::Indexer requires that the XML Builder library be installed:" "\n\tgem install builder" 62: end 63: 64: options = { :build_legacy => true, :build_modern => true }.merge options 65: 66: @build_legacy = options[:build_legacy] 67: @build_modern = options[:build_modern] 68: 69: @rss_title = options[:rss_title] 70: @rss_host = options[:rss_host] 71: @rss_gems_host = options[:rss_gems_host] 72: 73: @dest_directory = directory 74: @directory = File.join Dir.tmpdir, "gem_generate_index_#{$$}" 75: 76: marshal_name = "Marshal.#{Gem.marshal_version}" 77: 78: @master_index = File.join @directory, 'yaml' 79: @marshal_index = File.join @directory, marshal_name 80: 81: @quick_dir = File.join @directory, 'quick' 82: 83: @quick_marshal_dir = File.join @quick_dir, marshal_name 84: 85: @quick_index = File.join @quick_dir, 'index' 86: @latest_index = File.join @quick_dir, 'latest_index' 87: 88: @specs_index = File.join @directory, "specs.#{Gem.marshal_version}" 89: @latest_specs_index = File.join @directory, 90: "latest_specs.#{Gem.marshal_version}" 91: @prerelease_specs_index = File.join(@directory, 92: "prerelease_specs.#{Gem.marshal_version}") 93: 94: @dest_specs_index = File.join @dest_directory, 95: "specs.#{Gem.marshal_version}" 96: @dest_latest_specs_index = File.join @dest_directory, 97: "latest_specs.#{Gem.marshal_version}" 98: @dest_prerelease_specs_index = File.join @dest_directory, 99: "prerelease_specs.#{Gem.marshal_version}" 100: 101: @rss_index = File.join @directory, 'index.rss' 102: 103: @files = [] 104: end
Abbreviate the spec for downloading. Abbreviated specs are only used for searching, downloading and related activities and do not need deployment specific information (e.g. list of files). So we abbreviate the spec, making it much smaller for quicker downloads.
# File lib/rubygems/indexer.rb, line 113 113: def abbreviate(spec) 114: spec.files = [] 115: spec.test_files = [] 116: spec.rdoc_options = [] 117: spec.extra_rdoc_files = [] 118: spec.cert_chain = [] 119: spec 120: end
Build various indicies
# File lib/rubygems/indexer.rb, line 125 125: def build_indicies(index) 126: # Marshal gemspecs are used by both modern and legacy RubyGems 127: build_marshal_gemspecs index 128: build_legacy_indicies index if @build_legacy 129: build_modern_indicies index if @build_modern 130: build_rss index 131: 132: compress_indicies 133: end
Builds indicies for RubyGems older than 1.2.x
# File lib/rubygems/indexer.rb, line 138 138: def build_legacy_indicies(index) 139: progress = ui.progress_reporter index.size, 140: "Generating YAML quick index gemspecs for #{index.size} gems", 141: "Complete" 142: 143: Gem.time 'Generated YAML quick index gemspecs' do 144: index.released_gems.each do |original_name, spec| 145: spec_file_name = "#{original_name}.gemspec.rz" 146: yaml_name = File.join @quick_dir, spec_file_name 147: 148: yaml_zipped = Gem.deflate spec.to_yaml 149: open yaml_name, 'wb' do |io| io.write yaml_zipped end 150: 151: progress.updated original_name 152: end 153: 154: progress.done 155: end 156: 157: say "Generating quick index" 158: 159: Gem.time 'Generated quick index' do 160: open @quick_index, 'wb' do |io| 161: io.puts index.sort.map { |_, spec| spec.original_name } 162: end 163: end 164: 165: say "Generating latest index" 166: 167: Gem.time 'Generated latest index' do 168: open @latest_index, 'wb' do |io| 169: io.puts index.latest_specs.sort.map { |spec| spec.original_name } 170: end 171: end 172: 173: # Don't need prerelease legacy index 174: 175: say "Generating Marshal master index" 176: 177: Gem.time 'Generated Marshal master index' do 178: open @marshal_index, 'wb' do |io| 179: io.write index.dump 180: end 181: end 182: 183: progress = ui.progress_reporter index.size, 184: "Generating YAML master index for #{index.size} gems (this may take a while)", 185: "Complete" 186: 187: Gem.time 'Generated YAML master index' do 188: open @master_index, 'wb' do |io| 189: io.puts "--- !ruby/object:#{index.class}" 190: io.puts "gems:" 191: 192: gems = index.sort_by { |name, gemspec| gemspec.sort_obj } 193: gems.each do |original_name, gemspec| 194: yaml = gemspec.to_yaml.gsub(/^/, ' ') 195: yaml = yaml.sub(/\A ---/, '') # there's a needed extra ' ' here 196: io.print " #{original_name}:" 197: io.puts yaml 198: 199: progress.updated original_name 200: end 201: end 202: 203: progress.done 204: end 205: 206: @files << @quick_dir 207: @files << @master_index 208: @files << "#{@master_index}.Z" 209: @files << @marshal_index 210: @files << "#{@marshal_index}.Z" 211: end
Builds Marshal quick index gemspecs.
# File lib/rubygems/indexer.rb, line 216 216: def build_marshal_gemspecs(index) 217: progress = ui.progress_reporter index.size, 218: "Generating Marshal quick index gemspecs for #{index.size} gems", 219: "Complete" 220: 221: files = [] 222: 223: Gem.time 'Generated Marshal quick index gemspecs' do 224: index.gems.each do |original_name, spec| 225: spec_file_name = "#{original_name}.gemspec.rz" 226: marshal_name = File.join @quick_marshal_dir, spec_file_name 227: 228: marshal_zipped = Gem.deflate Marshal.dump(spec) 229: open marshal_name, 'wb' do |io| io.write marshal_zipped end 230: 231: files << marshal_name 232: 233: progress.updated original_name 234: end 235: 236: progress.done 237: end 238: 239: @files << @quick_marshal_dir 240: 241: files 242: end
Build a single index for RubyGems 1.2 and newer
# File lib/rubygems/indexer.rb, line 247 247: def build_modern_index(index, file, name) 248: say "Generating #{name} index" 249: 250: Gem.time "Generated #{name} index" do 251: open(file, 'wb') do |io| 252: specs = index.map do |*spec| 253: # We have to splat here because latest_specs is an array, 254: # while the others are hashes. See the TODO in source_index.rb 255: spec = spec.flatten.last 256: platform = spec.original_platform 257: 258: # win32-api-1.0.4-x86-mswin32-60 259: unless String === platform then 260: alert_warning "Skipping invalid platform in gem: #{spec.full_name}" 261: next 262: end 263: 264: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 265: [spec.name, spec.version, platform] 266: end 267: 268: specs = compact_specs(specs) 269: Marshal.dump(specs, io) 270: end 271: end 272: end
Builds indicies for RubyGems 1.2 and newer. Handles full, latest, prerelease
# File lib/rubygems/indexer.rb, line 277 277: def build_modern_indicies(index) 278: build_modern_index(index.released_specs.sort, @specs_index, 'specs') 279: build_modern_index(index.latest_specs.sort, 280: @latest_specs_index, 281: 'latest specs') 282: build_modern_index(index.prerelease_specs.sort, 283: @prerelease_specs_index, 284: 'prerelease specs') 285: 286: @files += [@specs_index, 287: "#{@specs_index}.gz", 288: @latest_specs_index, 289: "#{@latest_specs_index}.gz", 290: @prerelease_specs_index, 291: "#{@prerelease_specs_index}.gz"] 292: end
Builds an RSS feed for past two days gem releases according to the gem’s date.
# File lib/rubygems/indexer.rb, line 298 298: def build_rss(index) 299: if @rss_host.nil? or @rss_gems_host.nil? then 300: if Gem.configuration.really_verbose then 301: alert_warning "no --rss-host or --rss-gems-host, RSS generation disabled" 302: end 303: return 304: end 305: 306: require 'cgi' 307: require 'rubygems/text' 308: 309: extend Gem::Text 310: 311: Gem.time 'Generated rss' do 312: open @rss_index, 'wb' do |io| 313: rss_host = CGI.escapeHTML @rss_host 314: rss_title = CGI.escapeHTML(@rss_title || 'gems') 315: 316: io.puts <?xml version="1.0"?><rss version="2.0"> <channel> <title>#{rss_title}</title> <link>http://#{rss_host}</link> <description>Recently released gems from http://#{rss_host}</description> <generator>RubyGems v#{Gem::VERSION}</generator> <docs>http://cyber.law.harvard.edu/rss/rss.html</docs> 317: 318: today = Gem::Specification::TODAY 319: yesterday = today - 86400 320: 321: index = index.select do |_, spec| 322: spec_date = spec.date 323: 324: case spec_date 325: when Date 326: Time.parse(spec_date.to_s) >= yesterday 327: when Time 328: spec_date >= yesterday 329: end 330: end 331: 332: index = index.select do |_, spec| 333: spec_date = spec.date 334: 335: case spec_date 336: when Date 337: Time.parse(spec_date.to_s) <= today 338: when Time 339: spec_date <= today 340: end 341: end 342: 343: index.sort_by { |_, spec| [-spec.date.to_i, spec] }.each do |_, spec| 344: gem_path = CGI.escapeHTML "http://#{@rss_gems_host}/gems/#{spec.file_name}" 345: size = File.stat(spec.loaded_from).size rescue next 346: 347: description = spec.description || spec.summary || '' 348: authors = Array spec.authors 349: emails = Array spec.email 350: authors = emails.zip(authors).map do |email, author| 351: email += " (#{author})" if author and not author.empty? 352: end.join ', ' 353: 354: description = description.split(/\n\n+/).map do |chunk| 355: format_text chunk, 78 356: end 357: 358: description = description.join "\n\n" 359: 360: item = '' 361: 362: item << <item> <title>#{CGI.escapeHTML spec.full_name}</title> <description><pre>#{CGI.escapeHTML description.chomp}</pre> </description> <author>#{CGI.escapeHTML authors}</author> <guid>#{CGI.escapeHTML spec.full_name}</guid> <enclosure url=\"#{gem_path}\" length=\"#{size}\" type=\"application/octet-stream\" /> <pubDate>#{spec.date.rfc2822}</pubDate> 363: 364: item << <link>#{CGI.escapeHTML spec.homepage}</link> if spec.homepage 365: 366: item << </item> 367: 368: io.puts item 369: end 370: 371: io.puts </channel></rss> 372: end 373: end 374: 375: @files << @rss_index 376: end
Collect specifications from .gem files from the gem directory.
# File lib/rubygems/indexer.rb, line 408 408: def collect_specs(gems = gem_file_list) 409: index = Gem::SourceIndex.new 410: 411: progress = ui.progress_reporter gems.size, 412: "Loading #{gems.size} gems from #{@dest_directory}", 413: "Loaded all gems" 414: 415: Gem.time 'loaded' do 416: gems.each do |gemfile| 417: if File.size(gemfile.to_s) == 0 then 418: alert_warning "Skipping zero-length gem: #{gemfile}" 419: next 420: end 421: 422: begin 423: spec = Gem::Format.from_file_by_path(gemfile).spec 424: spec.loaded_from = gemfile 425: 426: unless gemfile =~ /\/#{Regexp.escape spec.original_name}.*\.gem\z/ then 427: expected_name = spec.full_name 428: expected_name << " (#{spec.original_name})" if 429: spec.original_name != spec.full_name 430: alert_warning "Skipping misnamed gem: #{gemfile} should be named #{expected_name}" 431: next 432: end 433: 434: abbreviate spec 435: sanitize spec 436: 437: index.add_spec spec, spec.original_name 438: 439: progress.updated spec.original_name 440: 441: rescue SignalException => e 442: alert_error "Received signal, exiting" 443: raise 444: rescue Exception => e 445: alert_error "Unable to process #{gemfile}\n#{e.message} (#{e.class})\n\t#{e.backtrace.join "\n\t"}" 446: end 447: end 448: 449: progress.done 450: end 451: 452: index 453: end
Compacts Marshal output for the specs index data source by using identical objects as much as possible.
# File lib/rubygems/indexer.rb, line 490 490: def compact_specs(specs) 491: names = {} 492: versions = {} 493: platforms = {} 494: 495: specs.map do |(name, version, platform)| 496: names[name] = name unless names.include? name 497: versions[version] = version unless versions.include? version 498: platforms[platform] = platform unless platforms.include? platform 499: 500: [names[name], versions[version], platforms[platform]] 501: end 502: end
Compress filename with extension.
# File lib/rubygems/indexer.rb, line 507 507: def compress(filename, extension) 508: data = Gem.read_binary filename 509: 510: zipped = Gem.deflate data 511: 512: open "#{filename}.#{extension}", 'wb' do |io| 513: io.write zipped 514: end 515: end
Compresses indicies on disk
# File lib/rubygems/indexer.rb, line 460 460: def compress_indicies 461: say "Compressing indicies" 462: 463: Gem.time 'Compressed indicies' do 464: if @build_legacy then 465: compress @quick_index, 'rz' 466: paranoid @quick_index, 'rz' 467: 468: compress @latest_index, 'rz' 469: paranoid @latest_index, 'rz' 470: 471: compress @marshal_index, 'Z' 472: paranoid @marshal_index, 'Z' 473: 474: compress @master_index, 'Z' 475: paranoid @master_index, 'Z' 476: end 477: 478: if @build_modern then 479: gzip @specs_index 480: gzip @latest_specs_index 481: gzip @prerelease_specs_index 482: end 483: end 484: end
List of gem file names to index.
# File lib/rubygems/indexer.rb, line 520 520: def gem_file_list 521: Dir.glob(File.join(@dest_directory, "gems", "*.gem")) 522: end
Builds and installs indicies.
# File lib/rubygems/indexer.rb, line 527 527: def generate_index 528: make_temp_directories 529: index = collect_specs 530: build_indicies index 531: install_indicies 532: rescue SignalException 533: ensure 534: FileUtils.rm_rf @directory 535: end
Zlib::GzipWriter wrapper that gzips filename on disk.
# File lib/rubygems/indexer.rb, line 540 540: def gzip(filename) 541: Zlib::GzipWriter.open "#{filename}.gz" do |io| 542: io.write Gem.read_binary(filename) 543: end 544: end
Install generated indicies into the destination directory.
# File lib/rubygems/indexer.rb, line 549 549: def install_indicies 550: verbose = Gem.configuration.really_verbose 551: 552: say "Moving index into production dir #{@dest_directory}" if verbose 553: 554: files = @files.dup 555: files.delete @quick_marshal_dir if files.include? @quick_dir 556: 557: if files.include? @quick_marshal_dir and 558: not files.include? @quick_dir then 559: files.delete @quick_marshal_dir 560: quick_marshal_dir = @quick_marshal_dir.sub @directory, '' 561: 562: dst_name = File.join @dest_directory, quick_marshal_dir 563: 564: FileUtils.mkdir_p File.dirname(dst_name), :verbose => verbose 565: FileUtils.rm_rf dst_name, :verbose => verbose 566: FileUtils.mv @quick_marshal_dir, dst_name, :verbose => verbose, 567: :force => true 568: end 569: 570: files = files.map do |path| 571: path.sub @directory, '' 572: end 573: 574: files.each do |file| 575: src_name = File.join @directory, file 576: dst_name = File.join @dest_directory, file 577: 578: FileUtils.rm_rf dst_name, :verbose => verbose 579: FileUtils.mv src_name, @dest_directory, :verbose => verbose, 580: :force => true 581: end 582: end
Make directories for index generation
# File lib/rubygems/indexer.rb, line 587 587: def make_temp_directories 588: FileUtils.rm_rf @directory 589: FileUtils.mkdir_p @directory, :mode => 0700 590: FileUtils.mkdir_p @quick_marshal_dir 591: end
Ensure path and path with extension are identical.
# File lib/rubygems/indexer.rb, line 596 596: def paranoid(path, extension) 597: data = Gem.read_binary path 598: compressed_data = Gem.read_binary "#{path}.#{extension}" 599: 600: unless data == Gem.inflate(compressed_data) then 601: raise "Compressed file #{compressed_path} does not match uncompressed file #{path}" 602: end 603: end
Sanitize the descriptive fields in the spec. Sometimes non-ASCII characters will garble the site index. Non-ASCII characters will be replaced by their XML entity equivalent.
# File lib/rubygems/indexer.rb, line 610 610: def sanitize(spec) 611: spec.summary = sanitize_string(spec.summary) 612: spec.description = sanitize_string(spec.description) 613: spec.post_install_message = sanitize_string(spec.post_install_message) 614: spec.authors = spec.authors.collect { |a| sanitize_string(a) } 615: 616: spec 617: end
Sanitize a single string.
# File lib/rubygems/indexer.rb, line 622 622: def sanitize_string(string) 623: # HACK the #to_s is in here because RSpec has an Array of Arrays of 624: # Strings for authors. Need a way to disallow bad values on gempsec 625: # generation. (Probably won't happen.) 626: string ? string.to_s.to_xs : string 627: end
Perform an in-place update of the repository from newly added gems. Only works for modern indicies, and sets # to false when run.
# File lib/rubygems/indexer.rb, line 633 633: def update_index 634: @build_legacy = false 635: 636: make_temp_directories 637: 638: specs_mtime = File.stat(@dest_specs_index).mtime 639: newest_mtime = Time.at 0 640: 641: updated_gems = gem_file_list.select do |gem| 642: gem_mtime = File.stat(gem).mtime 643: newest_mtime = gem_mtime if gem_mtime > newest_mtime 644: gem_mtime >= specs_mtime 645: end 646: 647: if updated_gems.empty? then 648: say 'No new gems' 649: terminate_interaction 0 650: end 651: 652: index = collect_specs updated_gems 653: 654: files = build_marshal_gemspecs index 655: 656: Gem.time 'Updated indexes' do 657: update_specs_index index.released_gems, @dest_specs_index, @specs_index 658: update_specs_index index.released_gems, @dest_latest_specs_index, @latest_specs_index 659: update_specs_index(index.prerelease_gems, @dest_prerelease_specs_index, 660: @prerelease_specs_index) 661: end 662: 663: compress_indicies 664: 665: verbose = Gem.configuration.really_verbose 666: 667: say "Updating production dir #{@dest_directory}" if verbose 668: 669: files << @specs_index 670: files << "#{@specs_index}.gz" 671: files << @latest_specs_index 672: files << "#{@latest_specs_index}.gz" 673: files << @prerelease_specs_index 674: files << "#{@prerelease_specs_index}.gz" 675: 676: files = files.map do |path| 677: path.sub @directory, '' 678: end 679: 680: files.each do |file| 681: src_name = File.join @directory, file 682: dst_name = File.join @dest_directory, File.dirname(file) 683: 684: FileUtils.mv src_name, dst_name, :verbose => verbose, 685: :force => true 686: 687: File.utime newest_mtime, newest_mtime, dst_name 688: end 689: end
Combines specs in index and source then writes out a new copy to dest. For a latest index, does not ensure the new file is minimal.
# File lib/rubygems/indexer.rb, line 695 695: def update_specs_index(index, source, dest) 696: specs_index = Marshal.load Gem.read_binary(source) 697: 698: index.each do |_, spec| 699: platform = spec.original_platform 700: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 701: specs_index << [spec.name, spec.version, platform] 702: end 703: 704: specs_index = compact_specs specs_index.uniq.sort 705: 706: open dest, 'wb' do |io| 707: Marshal.dump specs_index, io 708: end 709: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.