🕺🏾 👨‍👨‍👦 👩🏿‍🔧 graphql-优化数据库查询 🕺🏾 ⛔️ 🌿

在使用数据库时，存在一个称为“ SELECT N + 1”的问题-当应用程序而不是对数据库的单个查询从多个相关表，集合中选择所有必要数据时，会对第一个查询结果的每一行进行一个附加子查询，获取相关数据。例如，首先我们获得一个大学生名单，其中用标识符标识了他的专业，然后对每个学生，我们对一个表格或一个专业集合进行了一个附加子查询，以便通过该专业标识符来获得该专业的名称。由于每个子查询可能需要另一个子查询和另一个子查询，因此对数据库的查询数量开始呈指数增长。

使用graphql时，如果您在解析程序功能的链接表上进行子查询，则生成“ SELECT N + 1”问题非常简单。首先想到的是立即考虑所有相关数据进行请求，但是必须同意，如果客户未请求相关数据，这是不合理的。

本文将考虑为graphql解决“ SELECT N +1”问题的一种解决方案。

例如，采用两个集合：“作者”（作者）和“书”（书）。正如人们所想象的那样，这种关系是多对多的。一位作者可以拥有多本书，而一本书可以由多位作者撰写。为了存储信息，我们将使用mongodb数据库和mongoose.js库

我们使用辅助BookAuthor集合和虚拟字段来实现多对多集合之间的关系。

// Author.js const mongoose = require('mongoose'); const Schema = mongoose.Schema; const schema = new Schema({ name: String }); schema.virtual('books', { ref: 'BookAuthor', localField: '_id', foreignField: 'author' }); module.exports = schema;

 // Book.js const mongoose = require('mongoose'); const Schema = mongoose.Schema; const schema = new Schema({ title: String }); schema.virtual('authors', { ref: 'BookAuthor', localField: '_id', foreignField: 'book' }); module.exports = schema;

 // BookAuthor.js const mongoose = require('mongoose'); const Schema = mongoose.Schema; const schema = new Schema({ author: { type: mongoose.Schema.Types.ObjectId, ref: 'Author' }, book: { type: mongoose.Schema.Types.ObjectId, ref: 'Book' } }); module.exports = schema;

 // mongoSchema.js const mongoose = require('mongoose'); const Author = require('./Author'); const Book = require('./Book'); const BookAuthor = require('./BookAuthor'); mongoose.connect('mongodb://localhost:27017/books') mongoose.set('debug', true); exports.Author = mongoose.model('Author', Author); exports.Book = mongoose.model('Book', Book); exports.BookAuthor = mongoose.model('BookAuthor', BookAuthor);

现在在graphql中定义类型Author和Book。这些类型相互引用的事实存在一个小问题。因此，对于它们的相互访问，使用到导出模块的对象的链接绑定，而不是将新对象绑定到module.exports（替换原始对象），并且fields字段作为函数实现，允许您“推迟”在创建对象时读取到对象的链接直到所有循环引用都可用：

 // graphqlType.js exports.Author = require('./Author'); exports.Book = require('./Book');

 // Author.js const graphql = require('graphql') const graphqlType = require('./index') module.exports = new graphql.GraphQLObjectType({ name: 'author', description: '', fields: () => ({ _id: {type: graphql.GraphQLString}, name: { type: graphql.GraphQLString, }, books: { type: new graphql.GraphQLList(graphqlType.Book), resolve: obj => obj.books && obj.books.map(book => book.book) } }) });

 // Book.js const graphql = require('graphql') const graphqlType = require('./index') module.exports = new graphql.GraphQLObjectType({ name: 'book', description: '', fields: () => ({ _id: {type: graphql.GraphQLString}, title: { type: graphql.GraphQLString, }, authors: { type: new graphql.GraphQLList(graphqlType.Author), resolve: obj => obj.authors && obj.authors.map(author => author.author) } }) });

现在，我们定义作者的请求，可能使用他们的书籍清单，也可能使用这些书籍的作者（共同作者）清单。

 const graphql = require('graphql'); const getFieldNames = require('graphql-list-fields'); const graphqlType = require('../graphqlType'); const mongoSchema = require('../mongoSchema'); module.exports = { type: new graphql.GraphQLList(graphqlType.Author), args: { _id: { type: graphql.GraphQLString } }, resolve: (_, {_id}, context, info) => { const fields = getFieldNames(info); const where = _id ? {_id} : {}; const authors = mongoSchema.Author.find(where) if (fields.indexOf('books.authors.name') > -1 ) { authors.populate({ path: 'books', populate: { path: 'book', populate: {path: 'authors', populate: {path: 'author'}} } }) } else if (fields.indexOf('books.title') > -1 ) { authors.populate({path: 'books', populate: {path: 'book'}}) } return authors.exec(); } };

为了确定哪些字段请求来自客户端，使用了graphql-list-fields库。如果请求带有嵌套对象，则会调用猫鼬库的populate（）方法。

现在让我们尝试一下查询。我们实施的最大可能要求：

 { author { _id name books { _id title authors { _id name } } } }

将通过对数据库的5次调用来执行：

 authors.find({}, { fields: {} }) bookauthors.find({ author: { '$in': [ ObjectId("5b0fcab305b15d38f672357d"), ObjectId("5b0fcabd05b15d38f672357e"), ObjectId("5b0fcac405b15d38f672357f"), ObjectId("5b0fcad705b15d38f6723580"), ObjectId("5b0fcae305b15d38f6723581"), ObjectId("5b0fedb94ad5435896079cf1"), ObjectId("5b0fedbd4ad5435896079cf2") ] } }, { fields: {} }) books.find({ _id: { '$in': [ ObjectId("5b0fcb7105b15d38f6723582") ] } }, { fields: {} }) bookauthors.find({ book: { '$in': [ ObjectId("5b0fcb7105b15d38f6723582") ] } }, { fields: {} }) authors.find({ _id: { '$in': [ ObjectId("5b0fcab305b15d38f672357d"), ObjectId("5b0fcad705b15d38f6723580") ] } }, { fields: {} }) 在'：[的ObjectId（ “5b0fcab305b15d38f672357d”）的ObjectId（ “5b0fcabd05b15d38f672357e”）的ObjectId（ “5b0fcac405b15d38f672357f”）的ObjectId（ “5b0fcad705b15d38f6723580”）的ObjectId（ “5b0fcae305b15d38f6723581”）的ObjectId authors.find({}, { fields: {} }) bookauthors.find({ author: { '$in': [ ObjectId("5b0fcab305b15d38f672357d"), ObjectId("5b0fcabd05b15d38f672357e"), ObjectId("5b0fcac405b15d38f672357f"), ObjectId("5b0fcad705b15d38f6723580"), ObjectId("5b0fcae305b15d38f6723581"), ObjectId("5b0fedb94ad5435896079cf1"), ObjectId("5b0fedbd4ad5435896079cf2") ] } }, { fields: {} }) books.find({ _id: { '$in': [ ObjectId("5b0fcb7105b15d38f6723582") ] } }, { fields: {} }) bookauthors.find({ book: { '$in': [ ObjectId("5b0fcb7105b15d38f6723582") ] } }, { fields: {} }) authors.find({ _id: { '$in': [ ObjectId("5b0fcab305b15d38f672357d"), ObjectId("5b0fcad705b15d38f6723580") ] } }, { fields: {} })

如您所见，mongoose.js函数-populate（）-不使用相对较新的mongodb功能-$查找，而是创建其他请求。但这不是“ SELECT N + 1”问题，因为不是为每一行创建一个新查询，而是为整个集合创建一个新查询。（希望检查mongoose.js populate（）函数实际上是如何工作的（一个或多个请求）是为此示例选择非关系数据库的动机之一）。

如果我们使用简约查询：

 { author { _id name } }

那么它将仅形成对数据库的一个调用：

  authors.find({}, { fields: {} })

实际上，这是我寻求的。总而言之，我会说，当我开始寻找该问题的解决方案时，我发现了非常方便且高级的库来解决此问题。例如，我真正喜欢的其中一个，基于关系数据库的结构，形成了带有所有必要操作的graphql模式。但是，如果在应用程序的后端使用graphql，则此方法是可以接受的。如果您从应用程序的前端打开对此类服务的访问权限（这是我所需要的），那么这类似于以开放式访问方式将管理面板放置到数据库服务器，如下所示：开箱即用的所有表格

为了方便读者，该工作示例位于存储库中。

补充乔尼克斯评论

提要中的joniks 用户已引用库https://github.com/facebook/dataloader 。让我们看看该库如何使您解决“ SELECT N + 1”的问题

给定该库，graphql Authors类型定义将如下所示：

 // Autors.js const graphql = require('graphql') const DataLoader = require('dataloader') const graphqlType = require('./index') const mongoSchema = require('../mongoSchema'); const bookLoader = new DataLoader(async ids => { const data = await mongoSchema.Book.find({ _id: { $in: ids }}).populate('authors').exec(); const books = data.reduce((obj, item) => (obj[item._id] = item) && obj, {}) const response = ids.map(id => books[id]); return response; }); module.exports = new graphql.GraphQLObjectType({ name: 'authors', description: '', fields: () => ({ _id: {type: graphql.GraphQLString}, name: { type: graphql.GraphQLString, }, books: { type: new graphql.GraphQLList(graphqlType.Books), resolve: obj => obj.books && obj.books.map(book => bookLoader.load(book.book)) } }) });

使用此库的意义是什么：累积单个bookLoader.load（id）请求，并使用标识符数组将其发送以进行处理const bookLoader = new DataLoader（async ids => {...
在输出中，我们必须返回与ids输入数组位于同一顺序的promise数组或promise数组。

现在，我们可以重写对作者的请求，如下所示：

 // authors.js const graphql = require('graphql'); const getFieldNames = require('graphql-list-fields'); const graphqlType = require('../graphqlType'); const mongoSchema = require('../mongoSchema'); module.exports = { type: new graphql.GraphQLList(graphqlType.Authors), args: { _id: { type: graphql.GraphQLString } }, resolve: (_, {_id}, context, info) => { const fields = getFieldNames(info); const where = _id ? {_id} : {}; const authors = mongoSchema.Author.find(where).populate('books') return authors.exec(); } };

结果，我们可以查询任意嵌套级别的相关对象，而不必担心SELECT N + 1问题（尽管以无条件调用pop（）的代价为代价，即使在不需要它的情况下）：

 { authors { _id name books { _id title authors { _id name books { _id title authors { _id name } } } } } }

但是在这里您需要真正理解，如果我们继续使用SQL服务器，那么在对象嵌套的每个级别上，都会有一个聚合查询。同时，有时要求这仍然只是一个请求。但是您不能直接使用dataloader库来实现。数据加载器存储库分支中提供了更改示例。

apapacy@gmail.com
2018年5月31日

graphql-优化数据库查询

More articles: