theme: condensed-night-purple
前言
前端时间在做项目加载优化时用到了splitChunks自动拆包,后了解了一下原理写下了此文。
Modules和Chunks
Modules简单来理解就是我们写的功能模块,不管是CommonJS还是ESM都算是一个Module,而Chunks则是webpack根据我们的规则/默认规则打包处理之后生成的产物,比如下图:
那么对于日常开发来说,如果我们要进行拆包,我们的配置规则大多是同一功能的模块会打进同一个chunk,或者说共同依赖会打包成同一个chunk,以此来避免同一段代码多次被引入业务代码进行打包
SplitChunks如何使用
关于拆包的具体使用,这里贴一个官网的例子简单说说,关于其他配置可参考webpack官网
splitChunks: {
chunks: "async", //chunks有三个值,initial表示直接引入的模块,async表示按需引入的模块,all则表示all in
minSize: 30000, //最小包体积,这里的单位是byte,超过这个大小的包会被splitChunks优化
minChunks: 1, //模块的最小引用次数,如果引用次数低于这个值,将不会被优化
maxAsyncRequests: 5, //设置async chunks的最大并行请求数
maxInitialRequests: 3, //设置initial chunks的最大并行请求数
automaticNameDelimiter: '~', //产出chunks的文件名分割符
name: true, //true:根据提取chunk的名字自动生成,false:根据缓存组IdHint生成,string:生成文件命即为这个string
cacheGroups: { //缓存组,自定义拆包规则在此定义
vendors: { //默认配置,node_modules的chunk
test: /[\/]node_modules[\/]/,
priority: -10
},
default: { //业务代码的chunk
minChunks: 2,
priority: -20,
reuseExistingChunk: true //复用已存在的chunks
}
}
}
而这一套默认的配置,则是在webpack里面默认定义的,我们可以在WebpackOptionsDefaulter.js里面找到它(当然还有别的配置项):
this.set("optimization.splitChunks", {});
this.set("optimization.splitChunks.hidePathInfo", "make", options => {
return isProductionLikeMode(options);
});
this.set("optimization.splitChunks.chunks", "async");
this.set("optimization.splitChunks.minSize", "make", options => {
return isProductionLikeMode(options) ? 30000 : 10000;//默认配置的minsize根据环境配置是不同的,生产环境的最小体积为30000byte,这里没有找到这样设置的理由,估计是因为开发环境的打包速度更为重要吧
});
this.set("optimization.splitChunks.minChunks", 1);
this.set("optimization.splitChunks.maxAsyncRequests", "make", options => {
return isProductionLikeMode(options) ? 5 : Infinity;
});
this.set("optimization.splitChunks.automaticNameDelimiter", "~");
this.set("optimization.splitChunks.automaticNameMaxLength", 109);
this.set("optimization.splitChunks.maxInitialRequests", "make", options => {
return isProductionLikeMode(options) ? 3 : Infinity;
});
this.set("optimization.splitChunks.name", true);
this.set("optimization.splitChunks.cacheGroups", {});
this.set("optimization.splitChunks.cacheGroups.default", {//这里也就是我们看到的默认将业务代码和mode_modules单独拆分的初始化代码
automaticNamePrefix: "",
reuseExistingChunk: true,
minChunks: 2,
priority: -20
});
this.set("optimization.splitChunks.cacheGroups.vendors", {
automaticNamePrefix: "vendors",
test: /[\/]node_modules[\/]/,
priority: -10
});
接下来举个实际使用例子吧:
splitChunks: {
chunks: 'all',
automaticNameDelimiter: '.',
name: true,
minSize: 30000,
maxSize: 0,
minChunks: 1,
maxAsyncRequests: 10,
maxInitialRequests: 6,
cacheGroups: {
antdesigns: {
name: 'antdesigns',
test: /[\/]node_modules[\/](@antd|antd|@ant-design)[\/]/,
priority: 10
},
reactfileviewer: {
name: 'reactfileviewer',
test: /[\/]node_modules[\/](react-file-viewer)[\/]/,
priority: 10
},
pdfmake: {
name: 'pdfmake',
test: /[\/]node_modules[\/](pdfmake)[\/]/,
priority: 10
},
bizcharts: {
name: 'bizcharts',
test: /[\/]node_modules[\/](bizcharts)[\/]/,
priority: 10
},
commons: {
name: 'commons',
test: /[\/]src[\/]/,
priority: 1
},
vendors: {
name: 'vendors',
test: /[\/]node_modules[\/]/,
priority: -1
},
}
这里值得注意的是,在我们的配置里面,任何一项不满足都不会进入该项的拆包逻辑
SplitChunks原理
接下来我们结合源码看看SplitChunks的运行原理,SplitChunks位置在webpack/lib/optimize/SplitChunksPlugin.js
进入源码我们首先看到SplitChunks的class定义:
module.exports = class SplitChunksPlugin {
constructor(options = {}) {
const defaultSizeTypes = options.defaultSizeTypes || [
"javascript",
"unknown"
];
const fallbackCacheGroup = options.fallbackCacheGroup || {};
const minSize = normalizeSizes(options.minSize, defaultSizeTypes);
const minSizeReduction = normalizeSizes(
options.minSizeReduction,
defaultSizeTypes
);
const maxSize = normalizeSizes(options.maxSize, defaultSizeTypes);
/** @type {SplitChunksOptions} */
//options的初始化逻辑,概括为我们的配置优先,如果没有则采用默认规则里的相关项
this.options = {
chunksFilter: normalizeChunksFilter(options.chunks || "all"),
defaultSizeTypes,
minSize,
minSizeReduction,
minRemainingSize: mergeSizes(
normalizeSizes(options.minRemainingSize, defaultSizeTypes),
minSize
),
enforceSizeThreshold: normalizeSizes(
options.enforceSizeThreshold,
defaultSizeTypes
),
maxAsyncSize: mergeSizes(
normalizeSizes(options.maxAsyncSize, defaultSizeTypes),
maxSize
),
maxInitialSize: mergeSizes(
normalizeSizes(options.maxInitialSize, defaultSizeTypes),
maxSize
),
minChunks: options.minChunks || 1,
maxAsyncRequests: options.maxAsyncRequests || 1,
maxInitialRequests: options.maxInitialRequests || 1,
hidePathInfo: options.hidePathInfo || false,
filename: options.filename || undefined,
getCacheGroups: normalizeCacheGroups(
options.cacheGroups,
defaultSizeTypes
),
getName: options.name ? normalizeName(options.name) : defaultGetName,//这里就是我们提到的如果name为true则使用chunks配置的name
automaticNameDelimiter: options.automaticNameDelimiter,
usedExports: options.usedExports,
fallbackCacheGroup: {
chunksFilter: normalizeChunksFilter(
fallbackCacheGroup.chunks || options.chunks || "all"
),
minSize: mergeSizes(
normalizeSizes(fallbackCacheGroup.minSize, defaultSizeTypes),
minSize
),
maxAsyncSize: mergeSizes(
normalizeSizes(fallbackCacheGroup.maxAsyncSize, defaultSizeTypes),
normalizeSizes(fallbackCacheGroup.maxSize, defaultSizeTypes),
normalizeSizes(options.maxAsyncSize, defaultSizeTypes),
normalizeSizes(options.maxSize, defaultSizeTypes)
),
maxInitialSize: mergeSizes(
normalizeSizes(fallbackCacheGroup.maxInitialSize, defaultSizeTypes),
normalizeSizes(fallbackCacheGroup.maxSize, defaultSizeTypes),
normalizeSizes(options.maxInitialSize, defaultSizeTypes),
normalizeSizes(options.maxSize, defaultSizeTypes)
),
automaticNameDelimiter:
fallbackCacheGroup.automaticNameDelimiter ||
options.automaticNameDelimiter ||
"~"
}
};
/** @type {WeakMap<CacheGroupSource, CacheGroup>} */
this._cacheGroupCache = new WeakMap();//cacheGroup的数据结构是WeakMap,这样做的目的是缓存每一个CacheGroup后续使用
}
准备阶段
先说结论:准备阶段最重要的是chunksInfoMap(存储代码分割信息,可以理解为处理我们配置的一些规则,比如chunk的size/count,这是为后续处理minSize/minChunks做准备的数据,chunksMap会一直更新,最终加入results返回,而results就是我们拆包之后能看到的chunks)和addModuleToChunksInfoMap(将代码分割信息加入chunksInfoMap),当然,在这两个方法中还调用了一些关键的方法,比如处理chunksSet、处理chunksKey等
接下来就到了SplitChunks最核心的一个方法了
/**
* Apply the plugin
* @param {Compiler} compiler the compiler instance
* @returns {void}
*/
apply(compiler) {//compiler是Compiler的一个实例,里面包含了webpack的所有配置,loaders、plugins等,在webpack启动的时候创建这个对象,全局唯一
const cachedMakePathsRelative = makePathsRelative.bindContextCache(
compiler.context,
compiler.root
);
...
}
接下来我们将这个方法拆开来看
chunksKey设置
在这里主要对chunks在映射中的key进行处理,用于后续优化
compiler.hooks.thisCompilation.tap("SplitChunksPlugin", compilation => {
const logger = compilation.getLogger("webpack.SplitChunksPlugin");
let alreadyOptimized = false;//这个标志表示收到新的模块
compilation.hooks.unseal.tap("SplitChunksPlugin", () => {
alreadyOptimized = false;
});
compilation.hooks.optimizeChunks.tap(
{
name: "SplitChunksPlugin",
stage: STAGE_ADVANCED
},
chunks => {//所有的拆包过程都在这个回调中执行
if (alreadyOptimized) return;
alreadyOptimized = true;
logger.time("prepare");
const chunkGraph = compilation.chunkGraph;
const moduleGraph = compilation.moduleGraph;
// Give each selected chunk an index (to create strings from chunks)
/** @type {Map<Chunk, bigint>} */
const chunkIndexMap = new Map();//创建一个map存chunks的index
const ZERO = BigInt("0");
const ONE = BigInt("1");
const START = ONE << BigInt("31");
let index = START;
for (const chunk of chunks) {//给每一个chunk一个初始index
chunkIndexMap.set(
chunk,
index | BigInt((Math.random() * 0x7fffffff) | 0)
);
index = index << ONE;
}
//关于这个index,4.x版本的方式是下面这样,没有看出现在这样方式的奥义,个人理解只是为了生成一个随机index
/*
* for (const chunk of chunks) {
* indexMap.set(chunk, index++);
* }
*/
/**
* @param {Iterable<Chunk>} chunks list of chunks
* @returns {bigint | Chunk} key of the chunks
*/
const getKey = chunks => {
//获取chunk的key
const iterator = chunks[Symbol.iterator]();
let result = iterator.next();
if (result.done) return ZERO;
const first = result.value;
result = iterator.next();
if (result.done) return first;
let key =
chunkIndexMap.get(first) | chunkIndexMap.get(result.value);
while (!(result = iterator.next()).done) {
const raw = chunkIndexMap.get(result.value);
key = key ^ raw;
}
return key;
};
const keyToString = key => {
if (typeof key === "bigint") return key.toString(16);
return chunkIndexMap.get(key).toString(16);
};
const getChunkSetsInGraph = memoize(() => {
/** @type {Map<bigint, Set<Chunk>>} */
// 这里将该key值和这些chunks建立映射关系,存在chunkSetsInGraph中,便于之后通过key值取出这些chunks集合,进行优化。
const chunkSetsInGraph = new Map();
/** @type {Set<Chunk>} */
const singleChunkSets = new Set();
//这里的重点是提取公共的module,这样就建立起了公共mudule和chunks(module.chunksIterable)的映射,chunks集合共用一个key
* //这样就知道每个module在哪些chunk中重复了
for (const module of compilation.modules) {
const chunks = chunkGraph.getModuleChunksIterable(module);
const chunksKey = getKey(chunks);
if (typeof chunksKey === "bigint") {
if (!chunkSetsInGraph.has(chunksKey)) {
chunkSetsInGraph.set(chunksKey, new Set(chunks));
}
} else {
//不存在chunkSetsInGraph中的chunk会走singleChunks处理
singleChunkSets.add(chunksKey);
}
}
return { chunkSetsInGraph, singleChunkSets };
});
处理引用次数
这里需要注意的是,module和chunksSet是一对一的关系,而这里的count和chunksSet是一对多的关系,这里的处理是为了后续对minChunks过滤做准备
const groupChunkSetsByCount = chunkSets => {
/** @type {Map<number, Array<Set<Chunk>>>} */
const chunkSetsByCount = new Map();
for (const chunksSet of chunkSets) {
//这里的count其实就是之前知道的module在chunks中的重复次数
const count = chunksSet.size;
let array = chunkSetsByCount.get(count);
//在chunkSetsByCount中的次数,如果没有则放空数组
if (array === undefined) {
array = [];
chunkSetsByCount.set(count, array);
}
array.push(chunksSet);
}
return chunkSetsByCount;
};
下面这段代码就是具体处理合并同key的chunk并处理子集的逻辑:
// Create a list of possible combinations
const createGetCombinations = (
chunkSets,
singleChunkSets,
chunkSetsByCount
) => {
/** @type {Map<bigint | Chunk, (Set<Chunk> | Chunk)[]>} */
const combinationsCache = new Map();
return key => {
const cacheEntry = combinationsCache.get(key);
if (cacheEntry !== undefined) return cacheEntry;
if (key instanceof Chunk) {
const result = [key];
//在这里进行了相同key的chunk合并
combinationsCache.set(key, result);
return result;
}
const chunksSet = chunkSets.get(key);
/** @type {(Set<Chunk> | Chunk)[]} */
const array = [chunksSet];
for (const [count, setArray] of chunkSetsByCount) {
//当chunk集合小于传入key对应的chunk集合时,进入是否子集的判断。如果是子集则和通过key拿到的集合存在一个数组中
if (count < chunksSet.size) {
for (const set of setArray) {
if (isSubset(chunksSet, set)) {
array.push(set);
}
}
}
}
for (const chunk of singleChunkSets) {
if (chunksSet.has(chunk)) {
array.push(chunk);
}
}
combinationsCache.set(key, array);
return array;
};
};
拆分缓存组
接下来就是最关键的准备工作:将每一项chunk分成独立的缓存组
/**
* @param {CacheGroup} cacheGroup the current cache group
* @param {number} cacheGroupIndex the index of the cache group of ordering
* @param {Chunk[]} selectedChunks chunks selected for this module
* @param {bigint | Chunk} selectedChunksKey a key of selectedChunks
* @param {Module} module the current module
* @returns {void}
*/
const addModuleToChunksInfoMap = (
cacheGroup,
cacheGroupIndex,
selectedChunks,
selectedChunksKey,
module
) => {
//小于minChunks直接返回
if (selectedChunks.length < cacheGroup.minChunks) return;
const name = cacheGroup.getName(
module,
selectedChunks,
cacheGroup.key
);
// Check if the name is ok
const existingChunk = compilation.namedChunks.get(name);
if (existingChunk) {
const parentValidationKey = `${name}|${
typeof selectedChunksKey === "bigint"
? selectedChunksKey
: selectedChunksKey.debugId
}`;
const valid = alreadyValidatedParents.get(parentValidationKey);
if (valid === false) return;
if (valid === undefined) {
// Module can only be moved into the existing chunk if the existing chunk
// is a parent of all selected chunks
let isInAllParents = true;
/** @type {Set<ChunkGroup>} */
//将每一个chunks分组写进一个queue
const queue = new Set();
for (const chunk of selectedChunks) {
for (const group of chunk.groupsIterable) {
queue.add(group);
}
}
for (const group of queue) {
//这一次循环是处理包含关系,如果当前group有parent 将它也拉进来
if (existingChunk.isInGroup(group)) continue;
let hasParent = false;
for (const parent of group.parentsIterable) {
hasParent = true;
queue.add(parent);
}
if (!hasParent) {
isInAllParents = false;
}
}
const valid = isInAllParents;
alreadyValidatedParents.set(parentValidationKey, valid);
if (!valid) {
if (!alreadyReportedErrors.has(name)) {
alreadyReportedErrors.add(name);
compilation.errors.push(
new WebpackError(
"SplitChunksPlugin\n" +
`Cache group "${cacheGroup.key}" conflicts with existing chunk.\n` +
`Both have the same name "${name}" and existing chunk is not a parent of the selected modules.\n` +
"Use a different name for the cache group or make sure that the existing chunk is a parent (e. g. via dependOn).\n" +
'HINT: You can omit "name" to automatically create a name.\n' +
"BREAKING CHANGE: webpack < 5 used to allow to use an entrypoint as splitChunk. " +
"This is no longer allowed when the entrypoint is not a parent of the selected modules.\n" +
"Remove this entrypoint and add modules to cache group's 'test' instead. " +
"If you need modules to be evaluated on startup, add them to the existing entrypoints (make them arrays). " +
"See migration guide of more info."
)
);
}
return;
}
}
}
//当缓存组有name配置的时候采用name,如果没有配置则会通过key自动生成
const key =
cacheGroup.key +
(name
? ` name:${name}`
: ` chunks:${keyToString(selectedChunksKey)}`);
// 将module加入chunksInfoMap
let info = chunksInfoMap.get(key);
if (info === undefined) {
chunksInfoMap.set(
key,
(info = {
modules: new SortableSet(
undefined,
compareModulesByIdentifier
),
cacheGroup,
cacheGroupIndex,
name,
sizes: {},
chunks: new Set(),
reuseableChunks: new Set(),
chunksKeys: new Set()
})
);
}
//这里通过modules的大小和添加module之后的大小比较,决定要不要更新info的size
const oldSize = info.modules.size;
info.modules.add(module);
if (info.modules.size !== oldSize) {
for (const type of module.getSourceTypes()) {
info.sizes[type] = (info.sizes[type] || 0) + module.size(type);
}
}
//也是通过新老大小决定是否将选中chunks加入info
const oldChunksKeysSize = info.chunksKeys.size;
info.chunksKeys.add(selectedChunksKey);
if (oldChunksKeysSize !== info.chunksKeys.size) {
for (const chunk of selectedChunks) {
info.chunks.add(chunk);
}
}
};
分组阶段
这一阶段的工作是处理我们配置的cacheGroups,分组阶段的重点是cacheGroup.chunksFilter过滤缓存组和chunksInfoMap的更新,核心代码如下
for (const module of compilation.modules) {
// 首先获取缓存组,如果不是数组或者为空则跳过
// 这里在options里面会处理我们的cacheGroups,如果传入函数,则会直接执行返回,如果是对象则会处理成一个results数组返回
let cacheGroups = this.options.getCacheGroups(module, context);
if (!Array.isArray(cacheGroups) || cacheGroups.length === 0) {
continue;
}
// 通过key获取包含同一module的chunks,前面讲过统一module的chunks会有一个共用的key存在combinationsCache中
const getCombs = memoize(() => {
const chunks = chunkGraph.getModuleChunksIterable(module);
const chunksKey = getKey(chunks);
return getCombinations(chunksKey);
});
// 初始化缓存组index开始遍历
let cacheGroupIndex = 0;
for (const cacheGroupSource of cacheGroups) {
const cacheGroup = this._getCacheGroup(cacheGroupSource);
const combs = cacheGroup.usedExports
? getCombsByUsedExports()
: getCombs();
// For all combination of chunk selection
for (const chunkCombination of combs) {
// Break if minimum number of chunks is not reached
const count =
chunkCombination instanceof Chunk ? 1 : chunkCombination.size;
if (count < cacheGroup.minChunks) continue;
// Select chunks by configuration
const { chunks: selectedChunks, key: selectedChunksKey } =
//这里需要注意cacheGroup.chunksFilter,这是chunks属性的过滤条件,定义在下面
getSelectedChunks(chunkCombination, cacheGroup.chunksFilter);
//最终依然通过addModuleToChunksInfoMap处理
addModuleToChunksInfoMap(
cacheGroup,
cacheGroupIndex,
selectedChunks,
selectedChunksKey,
module
);
}
cacheGroupIndex++;
}
}
//对应上面的cacheGroup.chunksFilter,会将chunks的初始条件过滤
const normalizeChunksFilter = chunks => {
if (chunks === "initial") {
return INITIAL_CHUNK_FILTER;
}
if (chunks === "async") {
return ASYNC_CHUNK_FILTER;
}
if (chunks === "all") {
return ALL_CHUNK_FILTER;
}
if (typeof chunks === "function") {
return chunks;
}
};
分组阶段总结下来就是:这个阶段会整理出cacheGroups,并基于chunks类型做一个初步处理,后续类似maxInitialRequests的条件会在下一个阶段(检查阶段)进行处理
检查阶段
在最终的检查阶段,主要工作是根据配置的对应规则进行处理,处理逻辑是一个很大的while,这里列举几个,感兴趣的同学可以到源码中看全部的检查规则
while (chunksInfoMap.size > 0) {//整个阶段的大循环
//寻找最匹配的cacheGroup,优先分割,产出打包结果
let bestEntryKey;
let bestEntry;
for (const pair of chunksInfoMap) {
const key = pair[0];
const info = pair[1];
if (
bestEntry === undefined ||
// 比较谁更需要有限分割
compareEntries(bestEntry, info) < 0
) {
bestEntry = info;
bestEntryKey = key;
}
}
const item = bestEntry;
chunksInfoMap.delete(bestEntryKey);
//通过缓存组生成新的chunk
let chunkName = item.name;
/** @type {Chunk} */
let newChunk;
// 如果chunk没有name,会判断是否可以复用一个chunk
let isExistingChunk = false;
let isReusedWithAllModules = false;
if (chunkName) {
const chunkByName = compilation.namedChunks.get(chunkName);
if (chunkByName !== undefined) {
newChunk = chunkByName;
const oldSize = item.chunks.size;
item.chunks.delete(newChunk);
isExistingChunk = item.chunks.size !== oldSize;
}
} else if (item.cacheGroup.reuseExistingChunk) {
outer: for (const chunk of item.chunks) {
if (
chunkGraph.getNumberOfChunkModules(chunk) !==
item.modules.size
) {
continue;
}
if (
item.chunks.size > 1 &&
chunkGraph.getNumberOfEntryModules(chunk) > 0
) {
continue;
}
for (const module of item.modules) {
if (!chunkGraph.isModuleInChunk(module, chunk)) {
continue outer;
}
}
if (!newChunk || !newChunk.name) {
newChunk = chunk;
} else if (
chunk.name &&
chunk.name.length < newChunk.name.length
) {
newChunk = chunk;
} else if (
chunk.name &&
chunk.name.length === newChunk.name.length &&
chunk.name < newChunk.name
) {
newChunk = chunk;
}
}
if (newChunk) {
item.chunks.delete(newChunk);
chunkName = undefined;
isExistingChunk = true;
isReusedWithAllModules = true;
}
}
//如果剩下的chunk大小依然不符合标准,则尝试进一步分割
if (usedChunks.size < item.chunks.size) {
if (isExistingChunk) usedChunks.add(newChunk);
if (usedChunks.size >= item.cacheGroup.minChunks) {
const chunksArr = Array.from(usedChunks);
for (const module of item.modules) {
addModuleToChunksInfoMap(
item.cacheGroup,
item.cacheGroupIndex,
chunksArr,
getKey(usedChunks),
module
);
}
}
continue;
}
以上就是列举的某一些配置项对应的规则的处理逻辑,在检查阶段的最后会生成results,也就是我们能看到的chunks,至此splitChunks就结束了
总结
纵观splitChunks的全部过程,可以看到,其实准备阶段的工作量是非常大的,进而到分组,再到最后的检查,将代码包一步步拆分,最终生成chunks。
以上就是本文的所有内容,如有错误,欢迎指正,感谢阅读。
暂无评论内容