Saturday, November 29, 2008

Linq is Cool, but…

Linq is great, got to love its ability to query object structures. In the following examples that I’ll show you I am only touching the abilities of Linq. And as you can see it is capable of doing some really cool things, but it comes at a price…

But first here is the basic object structure I use in the example:

Multiple classes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
using System.Collections.Generic;

namespace LinqIsCool
{
    public class DataStructure
    {
        public readonly Dictionary<string, Author> Authors;
        public readonly List<Blog> Blogs;
        public readonly Dictionary<string, Post> Posts;

        public DataStructure()
        {
            Authors = new Dictionary<string, Author>
            {
                {"mark",   new Author("Mark Nijhof")},
                {"mona",   new Author("Mona Nijhof")},
                {"milo",   new Author("Milo Nijhof")},
                {"thalia", new Author("Thalia Nijhof")}
            };
            Posts = new Dictionary<string, Post>
            {
                {"post0", new Post("Tech talk 1",   Authors["mark"])},
                {"post1", new Post("Tech talk 2",   Authors["mark"])},
                {"post2", new Post("Tech talk 3",   Authors["mark"])},
                {"post3", new Post("Family talk 1", Authors["mona"])},
                {"post4", new Post("Tech talk 4",   Authors["mark"])},
                {"post5", new Post("Family talk 2", Authors["mona"])},
                {"post6", new Post("Tech talk 5",   Authors["mark"])},
                {"post7", new Post("Tech talk 6",   Authors["mark"])},
                {"post8", new Post("Family talk 2", Authors["mark"])},
                {"post9", new Post("Family talk 3", Authors["milo"])}
            };

            Blogs = new List<Blog>
            {
                new Blog("Blog.Fohjin.com", Authors["mark"]) 
                { 
                    Posts = new List<Post>
                    {
                        Posts["post0"],
                        Posts["post1"],
                        Posts["post2"],
                        Posts["post4"],
                        Posts["post6"],
                        Posts["post7"],
                        Posts["post8"]
                    }
                },
                new Blog("KidsTalk.Nijhof.com", Authors["mark"]) 
                { 
                    Posts = new List<Post>
                    {
                        Posts["post9"]
                    }
                },
                new Blog("Family.Nijhof.com", Authors["mona"])
                { 
                    Posts = new List<Post>
                    {
                        Posts["post3"],
                        Posts["post5"],
                        Posts["post8"],
                        Posts["post9"]
                    }
                }
            };
        }
    }

    public class Author
    {
        public string AuthorName { get; set; }
        public Author(string name)
        {
            AuthorName = name;
        }
    }

    public class Blog
    {
        public string BlogName { get; set; }
        public Author Owner { get; set; }
        public IList<Post> Posts { get; set; }
        public Blog(string name, Author owner)
        {
            BlogName = name;
            Owner = owner;
        }
    }

    public class Post
    {
        public string Title { get; set; }
        public Author Author { get; set; }
        public Post(string title, Author author)
        {
            Title = title;
            Author = author;
        }
    }
}


Wow that was long. Now the code below will actually show you several examples of using Linq, look at the comments for an explanation:

class LinqExamples
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
using System;
using System.Collections.Generic;
using System.Linq;

namespace LinqIsCool
{
    public class LinqExamples
    {
        private readonly DataStructure dataStructure;

        public LinqExamples()
        {
            dataStructure = new DataStructure();
        }

        /// <summary>
        /// Parses the data structure to find Authors that do not own a Blog
        /// How does it work:
        /// The Linq query is joining all the available Authors with the Blogs connecting 
        /// the Author with the Blog.Owner place the results in blogAuthors. The
        /// DefaultIfEmpty operator supplies a default element for an empty sequence,
        /// which in our example means that if there is no matching Blog for an Author it
        /// will return null. Then we specify to only select the objects where blogAuthor
        /// is null, so in effect only selecing Authors that have no Blog
        /// </summary>
        public void GetAuthorsThatDoNotOwnABlog()
        {
            var authors = 
            (
                from author in dataStructure.Authors.Values
                join blog in dataStructure.Blogs on author equals blog.Owner into blogAuthors
                    from blogAuthor in blogAuthors.DefaultIfEmpty()
                    where blogAuthor == null
                select author
            ).ToList();

            authors.ForEach(x => Console.WriteLine("- " + x.AuthorName));
        }

        /// <summary>
        /// Parses the data structure to find the number of Blogs an Authors owns
        /// How does it work:
        /// Selects every Author from the dataStructure.Authors.Values list and adds a Blogs
        /// List that gets its content from a sub Ling Query that basically selects all the 
        /// Blogs from the current Author and returns the Count for it
        /// </summary>
        public void GetNumberOfBlogsPerAuthor()
        {
            var authors = 
            (
                from author in dataStructure.Authors.Values
                select new
                {
                    _Autor = author,
                    _Blogs = 
                    (
                        from blog in dataStructure.Blogs
                        where blog.Owner == author
                        select blog
                    ).ToList().Count()
                }
            ).ToList();

            authors.ForEach(x => Console.WriteLine("- " + x._Autor.AuthorName + " owns " + x._Blogs + " blogs"));
        }

        /// <summary>
        /// Parses the data structure to find the actual Blogs an Authors owns
        /// How does it work:
        /// Selects every Author from the dataStructure.Authors.Values list and adds a Blogs
        /// List that gets its content from a sub Ling Query that basically selects all the 
        /// Blogs from the current Author and returns the List
        /// </summary>
        public void GetBlogsPerAuthor()
        {
            var authors = 
            (
                from author in dataStructure.Authors.Values
                select new
                {
                    _Autor = author,
                    _Blogs = 
                    (
                        from blog in dataStructure.Blogs
                        where blog.Owner == author
                        select blog
                    ).ToList()
                }
            ).ToList();

            authors.ForEach(x =>
            {
                Console.Write("- " + x._Autor.AuthorName + " owns ");
                x._Blogs.ForEach(y => Console.Write(y.BlogName + " "));
                Console.WriteLine();
            });
        }

        /// <summary>
        /// Parses the data structure to find specific Posts that belong to a specific Blog
        /// How does it work:
        /// First the Linq query specifies that it will use both dataStructure.Posts and
        /// dataStructure.Blogs. Then it specifies that the Blog should be the same as
        /// selectedBlog, the next line specifies that the resulting Posts should be part of 
        /// the selected Blog.Posts list and the final step is to specify that the resulting 
        /// Posts should be part of the provided list requestedPostIds. Then it selects the Post
        /// </summary>
        public void GetSpecificPostsFromSpecificBlog()
        {
            var requestedPostIds = new List<string> {"post0", "post1", "post9"};
            var selectedBlog = dataStructure.Blogs[0];

            var selectedPosts = 
            (
                from post in dataStructure.Posts
                from blog in dataStructure.Blogs
                where
                    blog == selectedBlog
                    && blog.Posts.Contains(post.Value)
                    && requestedPostIds.Contains(post.Key)
                select post.Value
            ).ToList();

            selectedPosts.ForEach(x => Console.WriteLine("- " + x.Title + " by " + x.Author.AuthorName));
        }

        /// <summary>
        /// Parses the data structure to find Posts from a specifies Author on a Blog where the Author is not the Owner
        /// How does it work:
        /// First the Linq query specifies that it will use both dataStructure.Posts and
        /// dataStructure.Blogs. Then it specifies that the Post should be the same as
        /// requestedAuthor, the next line specifies that the blog.Owner should not be part of 
        /// the same as requestedAuthor and the final step is to specify that the resulting 
        /// Posts should be part of the Posts list of the current Blog. Then is selects the 
        /// Post and the Blog
        /// </summary>
        public void GetPostsFromSpecificAuthorWhereNotBlogOwner()
        {
            var requestedAuthor = dataStructure.Authors["mark"];

            var selectedPosts = 
            (
                from post in dataStructure.Posts
                from blog in dataStructure.Blogs
                where
                    post.Value.Author == requestedAuthor
                    && blog.Owner != requestedAuthor
                    && blog.Posts.Contains(post.Value)
                select new
                {
                    post.Value, 
                    blog
                }
            ).ToList();

            selectedPosts.ForEach(x => 
                Console.WriteLine("- " + 
                    x.Value.Title + " by " + 
                    x.Value.Author.AuthorName + " on " + 
                    x.blog.BlogName));
        }
    }
}


Now we finally get to the part where we talk about the downside of Linq queries, you really quickly run into the risk that your code becomes unreadable. So when you do write some complex Linq queries I would suggest you also write proper comments use understandable variable names and perhaps split the Linq query into multiple Linq queries. Below here is the output when running the methods:

GetAuthorsThatDoNotOwnABlog()
- Milo Nijhof
- Thalia Nijhof

GetNumberOfBlogsPerAuthor()
- Mark Nijhof owns 2 blogs
- Mona Nijhof owns 1 blogs
- Milo Nijhof owns 0 blogs
- Thalia Nijhof owns 0 blogs

GetBlogsPerAuthor()
- Mark Nijhof owns Blog.Fohjin.com KidsTalk.Nijhof.com
- Mona Nijhof owns Family.Nijhof.com
- Milo Nijhof owns
- Thalia Nijhof owns

GetSpecificPostsFromSpecificBlog()
- Tech talk 1 by Mark Nijhof
- Tech talk 2 by Mark Nijhof

GetPostsFromSpecificAuthorWhereNotBlogOwner()
- Family talk 2 by Mark Nijhof on Family.Nijhof.com


Just for fun I “Alt + Enter”-ed a couple times, did some manual formatting and viola you have the same results but now using Lambda, remember I said something about readability?

class LambdaExamples
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
using System;
using System.Collections.Generic;
using System.Linq;

namespace LinqIsCool
{
    public class LambdaExamples
    {
        private readonly DataStructure dataStructure;

        public LambdaExamples()
        {
            dataStructure = new DataStructure();
        }

        /// <summary>
        /// Parses the data structure to find Authors that do not own a Blog
        /// How does it work:
        /// The Linq query is joining all the available Authors with the Blogs connecting 
        /// the Author with the Blog.Owner place the results in blogAuthors. The
        /// DefaultIfEmpty operator supplies a default element for an empty sequence,
        /// which in our example means that if there is no matching Blog for an Author it
        /// will return null. Then we specify to only select the objects where blogAuthor
        /// is null, so in effect only selecing Authors that have no Blog
        /// </summary>
        public void GetAuthorsThatDoNotOwnABlog()
        {
            var authors = 
            (
                dataStructure.Authors.Values.GroupJoin(
                    dataStructure.Blogs, 
                    author => author, 
                    blog => blog.Owner,
                    (author, blogAuthors) => new {author, blogAuthors}
                ).SelectMany(
                    @t => @t.blogAuthors.DefaultIfEmpty(), 
                    (@t, blogAuthor) => new {@t, blogAuthor}
                ).Where(
                    @t => @t.blogAuthor == null
                ).Select(
                    @t => @t.@t.author
                )
            ).ToList();

            authors.ForEach(x => Console.WriteLine("- " + x.AuthorName));
        }

        /// <summary>
        /// Parses the data structure to find the number of Blogs an Authors owns
        /// How does it work:
        /// Selects every Author from the dataStructure.Authors.Values list and adds a Blogs
        /// List that gets its content from a sub Ling Query that basically selects all the 
        /// Blogs from the current Author and returns the Count for it
        /// </summary>
        public void GetNumberOfBlogsPerAuthor()
        {
            var authors = 
            (
                dataStructure.Authors.Values.Select(
                    author => new 
                    { 
                        _Autor = author, 
                        _Blogs =
                        (
                            dataStructure.Blogs.Where(
                                blog => blog.Owner == author
                            )
                        ).ToList().Count()
                    })
            ).ToList();

            authors.ForEach(x => Console.WriteLine("- " + x._Autor.AuthorName + " owns " + x._Blogs + " blogs"));
        }

        /// <summary>
        /// Parses the data structure to find the actual Blogs an Authors owns
        /// How does it work:
        /// Selects every Author from the dataStructure.Authors.Values list and adds a Blogs
        /// List that gets its content from a sub Ling Query that basically selects all the 
        /// Blogs from the current Author and returns the List
        /// </summary>
        public void GetBlogsPerAuthor()
        {
            var authors = 
            (
                dataStructure.Authors.Values.Select(
                    author => new
                    {
                        _Autor = author,
                        _Blogs =
                        (
                            dataStructure.Blogs.Where(
                                blog => blog.Owner == author
                            )
                        ).ToList()
                    }
                )
            ).ToList();

            authors.ForEach(x =>
            {
                Console.Write("- " + x._Autor.AuthorName + " owns ");
                x._Blogs.ForEach(y => Console.Write(y.BlogName + " "));
                Console.WriteLine();
            });
        }

        /// <summary>
        /// Parses the data structure to find specific Posts that belong to a specific Blog
        /// How does it work:
        /// First the Linq query specifies that it will use both dataStructure.Posts and
        /// dataStructure.Blogs. Then it specifies that the Blog should be the same as
        /// selectedBlog, the next line specifies that the resulting Posts should be part of 
        /// the selected Blog.Posts list and the final step is to specify that the resulting 
        /// Posts should be part of the provided list requestedPostIds. Then it selects the Post
        /// </summary>
        public void GetSpecificPostsFromSpecificBlog()
        {
            var requestedPostIds = new List<string> {"post0", "post1", "post9"};
            var selectedBlog = dataStructure.Blogs[0];

            var selectedPosts = 
            (
                dataStructure.Posts.SelectMany(
                    post => dataStructure.Blogs, 
                    (post, blog) => new {post, blog}
                ).Where(
                    @t => @t.blog == selectedBlog
                          && @t.blog.Posts.Contains(@t.post.Value)
                          && requestedPostIds.Contains(@t.post.Key)
                ).Select(
                    @t => @t.post.Value
                )
            ).ToList();

            selectedPosts.ForEach(x => Console.WriteLine("- " + x.Title + " by " + x.Author.AuthorName));
        }

        /// <summary>
        /// Parses the data structure to find Posts from a specifies Author on a Blog where the Author is not the Owner
        /// How does it work:
        /// First the Linq query specifies that it will use both dataStructure.Posts and
        /// dataStructure.Blogs. Then it specifies that the Post should be the same as
        /// requestedAuthor, the next line specifies that the blog.Owner should not be part of 
        /// the same as requestedAuthor and the final step is to specify that the resulting 
        /// Posts should be part of the Posts list of the current Blog. Then is selects the 
        /// Post and the Blog
        /// </summary>
        public void GetPostsFromSpecificAuthorWhereNotBlogOwner()
        {
            var requestedAuthor = dataStructure.Authors["mark"];

            var selectedPosts = 
            (
                dataStructure.Posts.SelectMany(
                    post => dataStructure.Blogs, 
                    (post, blog) => new
                    {
                        post, blog
                    }
                ).Where(
                    @t => @t.post.Value.Author == requestedAuthor
                          && @t.blog.Owner != requestedAuthor
                          && @t.blog.Posts.Contains(@t.post.Value)
                ).Select(
                    @t => new
                    {
                        @t.post.Value,
                        @t.blog
                    }
                )
            ).ToList();

            selectedPosts.ForEach(x => 
                Console.WriteLine("- " + 
                    x.Value.Title + " by " + 
                    x.Value.Author.AuthorName + " on " + 
                    x.blog.BlogName));
        }
    }
}


Hope you enjoyed this.

-Mark

No comments: