Skip to content

Commit

Permalink
[rubygems/rubygems] Reorganize and refactor CompactIndexClient
Browse files Browse the repository at this point in the history
  • Loading branch information
martinemde authored and matzbot committed May 25, 2024
1 parent 0bae2f0 commit 6b0afbb
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 155 deletions.
1 change: 1 addition & 0 deletions lib/bundler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 42,7 @@ module Bundler
autoload :Checksum, File.expand_path("bundler/checksum", __dir__)
autoload :CLI, File.expand_path("bundler/cli", __dir__)
autoload :CIDetector, File.expand_path("bundler/ci_detector", __dir__)
autoload :CompactIndexClient, File.expand_path("bundler/compact_index_client", __dir__)
autoload :Definition, File.expand_path("bundler/definition", __dir__)
autoload :Dependency, File.expand_path("bundler/dependency", __dir__)
autoload :Deprecate, File.expand_path("bundler/deprecate", __dir__)
Expand Down
11 changes: 4 additions & 7 deletions lib/bundler/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -767,13 767,10 @@ def warn_on_outdated_bundler

return unless SharedHelpers.md5_available?

latest = Fetcher::CompactIndex.
new(nil, Source::Rubygems::Remote.new(Gem::URI("https://rubygems.org")), nil, nil).
send(:compact_index_client).
instance_variable_get(:@cache).
dependencies("bundler").
map {|d| Gem::Version.new(d.first) }.
max
require_relative "vendored_uri"
remote = Source::Rubygems::Remote.new(Gem::URI("https://rubygems.org"))
cache_path = Bundler.user_cache.join("compact_index", remote.cache_slug)
latest = Bundler::CompactIndexClient.new(cache_path).latest_version("bundler")
return unless latest

current = Gem::Version.new(VERSION)
Expand Down
78 changes: 16 additions & 62 deletions lib/bundler/compact_index_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 21,17 @@ class Error < StandardError; end

require_relative "compact_index_client/cache"
require_relative "compact_index_client/cache_file"
require_relative "compact_index_client/parser"
require_relative "compact_index_client/updater"

attr_reader :directory

def initialize(directory, fetcher)
@directory = Pathname.new(directory)
@updater = Updater.new(fetcher)
@cache = Cache.new(@directory)
@endpoints = Set.new
@info_checksums_by_name = {}
@parsed_checksums = false
@mutex = Thread::Mutex.new
def initialize(directory, fetcher = nil)
@cache = Cache.new(directory, fetcher)
@parser = Parser.new(@cache)
end

def execution_mode=(block)
Bundler::CompactIndexClient.debug { "execution_mode=" }
@endpoints = Set.new

@cache.reset!
@execution_mode = block
end

Expand All @@ -60,67 53,28 @@ def sequentially
end

def names
Bundler::CompactIndexClient.debug { "/names" }
update("names", @cache.names_path, @cache.names_etag_path)
@cache.names
Bundler::CompactIndexClient.debug { "names" }
@parser.names
end

def versions
Bundler::CompactIndexClient.debug { "/versions" }
update("versions", @cache.versions_path, @cache.versions_etag_path)
versions, @info_checksums_by_name = @cache.versions
versions
Bundler::CompactIndexClient.debug { "versions" }
@parser.versions
end

def dependencies(names)
Bundler::CompactIndexClient.debug { "dependencies(#{names})" }
execution_mode.call(names) do |name|
update_info(name)
@cache.dependencies(name).map {|d| d.unshift(name) }
end.flatten(1)
end

def update_and_parse_checksums!
Bundler::CompactIndexClient.debug { "update_and_parse_checksums!" }
return @info_checksums_by_name if @parsed_checksums
update("versions", @cache.versions_path, @cache.versions_etag_path)
@info_checksums_by_name = @cache.checksums
@parsed_checksums = true
end

private

def update(remote_path, local_path, local_etag_path)
Bundler::CompactIndexClient.debug { "update(#{local_path}, #{remote_path})" }
unless synchronize { @endpoints.add?(remote_path) }
Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" }
return
end
@updater.update(url(remote_path), local_path, local_etag_path)
end

def update_info(name)
Bundler::CompactIndexClient.debug { "update_info(#{name})" }
path = @cache.info_path(name)
unless existing = @info_checksums_by_name[name]
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since it is missing from versions" }
return
end
checksum = SharedHelpers.checksum_for_file(path, :MD5)
if checksum == existing
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since the versions checksum matches the local checksum" }
return
end
Bundler::CompactIndexClient.debug { "updating info for #{name} since the versions checksum #{existing} != the local checksum #{checksum}" }
update("info/#{name}", path, @cache.info_etag_path(name))
execution_mode.call(names) {|name| @parser.info(name) }.flatten(1)
end

def url(path)
path
def latest_version(name)
Bundler::CompactIndexClient.debug { "latest_version(#{name})" }
@parser.info(name).map {|d| Gem::Version.new(d[1]) }.max
end

def synchronize
@mutex.synchronize { yield }
def available?
Bundler::CompactIndexClient.debug { "available?" }
@parser.available?
end
end
end
128 changes: 47 additions & 81 deletions lib/bundler/compact_index_client/cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,123 7,89 @@ class CompactIndexClient
class Cache
attr_reader :directory

def initialize(directory)
def initialize(directory, fetcher = nil)
@directory = Pathname.new(directory).expand_path
info_roots.each {|dir| mkdir(dir) }
mkdir(info_etag_root)
@updater = Updater.new(fetcher) if fetcher
@mutex = Thread::Mutex.new
@endpoints = Set.new

@info_root = mkdir("info")
@special_characters_info_root = mkdir("info-special-characters")
@info_etag_root = mkdir("info-etags")
end

def names
lines(names_path)
fetch("names", names_path, names_etag_path)
end

def names_path
directory.join("names")
def versions
fetch("versions", versions_path, versions_etag_path)
end

def names_etag_path
directory.join("names.etag")
end
def info(name, remote_checksum = nil)
path = info_path(name)

def versions
versions_by_name = Hash.new {|hash, key| hash[key] = [] }
info_checksums_by_name = {}

lines(versions_path).each do |line|
name, versions_string, info_checksum = line.split(" ", 3)
info_checksums_by_name[name] = info_checksum || ""
versions_string.split(",") do |version|
delete = version.delete_prefix!("-")
version = version.split("-", 2).unshift(name)
if delete
versions_by_name[name].delete(version)
else
versions_by_name[name] << version
end
end
if remote_checksum && remote_checksum != SharedHelpers.checksum_for_file(path, :MD5)
fetch("info/#{name}", path, info_etag_path(name))
else
Bundler::CompactIndexClient.debug { "update skipped info/#{name} (#{remote_checksum ? "versions index checksum is nil" : "versions index checksum matches local"})" }
read(path)
end

[versions_by_name, info_checksums_by_name]
end

def versions_path
directory.join("versions")
end

def versions_etag_path
directory.join("versions.etag")
def reset!
@mutex.synchronize { @endpoints.clear }
end

def checksums
lines(versions_path).each_with_object({}) do |line, checksums|
parse_version_checksum(line, checksums)
end
end
private

def dependencies(name)
lines(info_path(name)).map do |line|
parse_gem(line)
end
end
def names_path = directory.join("names")
def names_etag_path = directory.join("names.etag")
def versions_path = directory.join("versions")
def versions_etag_path = directory.join("versions.etag")

def info_path(name)
name = name.to_s
# TODO: converge this into the info_root by hashing all filenames like info_etag_path
if /[^a-z0-9_-]/.match?(name)
name = "-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}"
info_roots.last.join(name)
@special_characters_info_root.join(name)
else
info_roots.first.join(name)
@info_root.join(name)
end
end

def info_etag_path(name)
name = name.to_s
info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}")
@info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}")
end

private

def mkdir(dir)
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
def mkdir(name)
directory.join(name).tap do |dir|
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
end
end
end

def lines(path)
return [] unless path.file?
lines = SharedHelpers.filesystem_access(path, :read, &:read).split("\n")
header = lines.index("---")
header ? lines[header 1..-1] : lines
end

def parse_gem(line)
@dependency_parser ||= GemParser.new
@dependency_parser.parse(line)
end
def fetch(remote_path, path, etag_path)
if already_fetched?(remote_path)
Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" }
else
Bundler::CompactIndexClient.debug { "fetching #{remote_path}" }
@updater&.update(remote_path, path, etag_path)
end

# This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects.
# This method gets called at least once for every gem when parsing versions.
def parse_version_checksum(line, checksums)
line.freeze # allows slicing into the string to not allocate a copy of the line
name_end = line.index(" ")
checksum_start = line.index(" ", name_end 1) 1
checksum_end = line.size - checksum_start
# freeze name since it is used as a hash key
# pre-freezing means a frozen copy isn't created
name = line[0, name_end].freeze
checksum = line[checksum_start, checksum_end]
checksums[name] = checksum
read(path)
end

def info_roots
[
directory.join("info"),
directory.join("info-special-characters"),
]
def already_fetched?(remote_path)
@mutex.synchronize { !@endpoints.add?(remote_path) }
end

def info_etag_root
directory.join("info-etags")
def read(path)
return unless path.file?
SharedHelpers.filesystem_access(path, :read, &:read)
end
end
end
Expand Down
84 changes: 84 additions & 0 deletions lib/bundler/compact_index_client/parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 1,84 @@
# frozen_string_literal: true

module Bundler
class CompactIndexClient
class Parser
# `compact_index` - an object responding to #names, #versions, #info(name, checksum),
# returning the file contents as a string
def initialize(compact_index)
@compact_index = compact_index
@info_checksums = nil
@versions_by_name = nil
@available = nil
end

def names
lines(@compact_index.names)
end

def versions
@versions_by_name ||= Hash.new {|hash, key| hash[key] = [] }
@info_checksums = {}

lines(@compact_index.versions).each do |line|
name, versions_string, checksum = line.split(" ", 3)
@info_checksums[name] = checksum || ""
versions_string.split(",") do |version|
delete = version.delete_prefix!("-")
version = version.split("-", 2).unshift(name)
if delete
@versions_by_name[name].delete(version)
else
@versions_by_name[name] << version
end
end
end

@versions_by_name
end

def info(name)
data = @compact_index.info(name, info_checksums[name])
lines(data).map {|line| gem_parser.parse(line).unshift(name) }
end

def available?
return @available unless @available.nil?
@available = !info_checksums.empty?
end

private

def info_checksums
@info_checksums ||= lines(@compact_index.versions).each_with_object({}) do |line, checksums|
parse_version_checksum(line, checksums)
end
end

def lines(data)
return [] if data.nil? || data.empty?
lines = data.split("\n")
header = lines.index("---")
header ? lines[header 1..-1] : lines
end

def gem_parser
@gem_parser ||= GemParser.new
end

# This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects.
# This method gets called at least once for every gem when parsing versions.
def parse_version_checksum(line, checksums)
line.freeze # allows slicing into the string to not allocate a copy of the line
name_end = line.index(" ")
checksum_start = line.index(" ", name_end 1) 1
checksum_end = line.size - checksum_start
# freeze name since it is used as a hash key
# pre-freezing means a frozen copy isn't created
name = line[0, name_end].freeze
checksum = line[checksum_start, checksum_end]
checksums[name] = checksum
end
end
end
end
Loading

0 comments on commit 6b0afbb

Please sign in to comment.