Code Optimization

Interesting things about software development and code optimization

SSS - Simple Serverless Search for your website

Hello friends,


if you are in need of simple, your own search that will search through all your help documents then this may be a good case for you.

I will show my own simple and fast way if such implementation that I'm sure you will be able to use wherever you need: PHP, Java, Python, .NET, Ruby, Node.JS, etc.


So, lets start and see how it works!


1. First stage will be indexing your help files or DB records. In my case I had html files, so here is my code:

        
        char[] cToRemove = new char[] { '\r', '\n', '\t', ' ', ',', '/', '\\', '~', '–', '.', ':', '\'', '!', ';', '[', ']', '"', '{', '}', '=', '+', '_', ')', '(', '*', '&', '?', '%', '$', '#', '@', '`', '<', '>', '|' };
        char[] cToSplit = new char[] { ' ' };
        string[] toSkip = new string[] 
        {
            "i", "me", "you", "he", "she", "they", "them", "this", "that", "than", "then",  "it", "our", "their", "her", "his", "its", "it's",
            "was", "were", "is","are", "be", "being", "been", "can", "could", "should", "shall", "will", "would", "have", "has", "did", "do", "does",
            "may", "might", "must", "need", "better", "if", "else","also", "same", "now","new", "below", "above",
            "itself", "ourselves", "himself", "herself", "theirselves", "let", "get", "set", "done"
        };

        public ActionResult RunHelpCrawler()
        {

            string root = System.Web.HttpContext.Current.Server.MapPath("~/Views/Help");
            string json = System.Web.HttpContext.Current.Server.MapPath("~/shelp.json");

            AsyncManager.OutstandingOperations.Increment();
            System.Threading.Tasks.Task.Factory.StartNew(() =>
            {
                string suberrors = string.Empty;
                try
                {
                    string path = root;
                    string lastKeywrd = string.Empty;
                    //keywords, url, title
                    List<Models.Search> search = new List<Models.Search>();

                    foreach (string filePath in System.IO.Directory.EnumerateFiles(path))
                    {
                        //exclude everything you need
                        if (filePath.Contains("LeftSideMenu") || filePath.Contains("GetHelpTips"))
                        {
                            continue;
                        }

                        try
                        {
                            string url = Extensions.Extensions.DomainName + "/Help/" + System.IO.Path.GetFileNameWithoutExtension(filePath);
                            url = url.ToLowerInvariant();

                            //we are going to parse HTML to avoid unneeded text. tags, etc
                            var web = new HtmlAgilityPack.HtmlWeb();
                            var doc = web.Load(url);

                            var nH1 = doc.DocumentNode.Descendants("h1")
                             .FirstOrDefault();

                            string title = url;
                            if (nH1 != null)
                            {
                                title = nH1.InnerText;
                            }

                            string text = string.Empty;

                            //our HTMLs have marked sections with the 'shelp' class that we will use for indexing only
                            var nodes = doc.DocumentNode.Descendants()
                             .Single(x => x.Attributes["class"] != null && !string.IsNullOrEmpty(x.Attributes["class"].Value) && x.Attributes["class"].Value.Contains("shelp"))
                             .Descendants()
                            .Where(n =>
                               n.NodeType == HtmlAgilityPack.HtmlNodeType.Text &&
                               n.ParentNode.Name != "script" &&
                               n.ParentNode.Name != "style");
                            text = string.Empty;
                            foreach (var node in nodes)
                            {
                                text += node.InnerText;
                            }

                            foreach (var oc in cToRemove)
                            {
                                text = text.Replace(oc, ' ');
                            }
                            text = text.ToLowerInvariant();
                            //words to skip indexing like: he, she, this, that...
                            string[] keywords = text.Split(cToSplit, StringSplitOptions.RemoveEmptyEntries);
                            foreach (var keyword in keywords)
                            {
                                if (!string.IsNullOrEmpty(keyword) && keyword.Length > 2 && !toSkip.Any(a => a == keyword))
                                {
                                    lastKeywrd = keyword;
                                    Models.Search item = null;

                                    if (item == null)
                                    {
                                        item = search.FirstOrDefault(i => i.UrlTitle.Any(a => a.Item1 == url));
                                        if (item == null)
                                        {
                                            item = new Models.Search() { Keywords = "," + keyword + ",", UrlTitle = new List<Tuple<string, string>>() };
                                            search.Add(item);
                                        }
                                        else
                                        {
                                            if (!item.Keywords.Contains("," + keyword + ","))
                                            {
                                                item.Keywords = item.Keywords.TrimEnd(',') + "," + keyword + ",";
                                            }
                                        }
                                    }
                                    if (!item.UrlTitle.Any(a => a.Item1 == url))
                                    {
                                        item.UrlTitle.Add(new Tuple<string, string>(url, title));
                                    }
                                }
                            }
                            search = search.GroupBy(g => g.Keywords).Select(s => s.First()).ToList();
                        }
                        catch(Exception ex)
                        {
                            //LogException
                        }
                    }

                    using (System.IO.StreamWriter sw = new StreamWriter(json, false))
                    {

                        sw.Write(SimpleJson.SimpleJson.SerializeObject(search));
                    }
                }
                catch (Exception ex)
                {
                    //LogException
                }
            });


            return Content("ok");
        }


Whenever you add new help document just re-run this simple crawler that will re-index everything

2. UI and Javascript parts to allow users using this simple serverless search

UI

		<div class="row">
                    <div class="col-md-12">
                        <div class="input-group">
                            <span class="input-group-addon" id="shlpSearch" style="border: 1px solid #ccc;height: 26px;padding-top: 2px;padding-bottom: 2px;">Search</span>
                            <input type="search" id="hlpSearch" class="form-control" style="max-width:100%;height: 26px;padding: 6px;" title="Search help"
                                   placeholder="How to ...">
                        </div>
                    </div>
                </div>

JS

<script>
	var shelp;
        //load our index json and avoid caching
        $.getJSON("/shelp.json?antc="+new Date().getTime(), function (data) {
            shelp = data;
        });
		
	var prevHtml = "";
        function searchHelp(sh) {
            if (sh != "" && sh.length > 2) {
                if (prevHtml == "") {
                    prevHtml = $(".shelp").html();
                }
                var items = [];
		var lessitems=[];
                var ss = sh.replace(",", " ").split(" ");
				
		$.each(shelp, function (i) {
			var br=0;
			for(var f=0;f<ss.length;f++){
				br += (shelp[i].Keywords.indexOf(ss[f]) >= 0)?1:0;
			}
			$.each(shelp[i].UrlTitle, function (ii) {
				var el = "<a class='label label-default' style='font-size:125%;line-height:2' href='" + shelp[i].UrlTitle[ii].Item1 + "'>" + shelp[i].UrlTitle[ii].Item2 + "</a>";
				//most relevant first
                                if(br==ss.length){
					if ($.inArray(el, items) === -1){
						items.unshift(el);
					}
				}
				else if(br > 0){ //less relevant but containing at least one keyword
					if ($.inArray(el, items) === -1 && $.inArray(el, lessitems) === -1){
						lessitems.push(el);
					}
				}
			});
					
		});
		if(lessitems.length>0){
                        //split less relevant by horizontal line
			items.push("<hr style='margin: 0;padding: 0;'/>");
			$.each(lessitems, function (i) {
				items.push(lessitems[i]);
			});
		}
				
		$(".shelp").css("border", "1px solid");
		$(".shelp").css("box-shadow", "0 0 8px 1px");
                $(".shelp").html(items.join("<br/>"));
            }
            else if (prevHtml != "") {
		$(".shelp").css("border", "");
		$(".shelp").css("box-shadow", "");
                $(".shelp").html(prevHtml);
                prevHtml = "";
            }
        }
		
	$(document).ready(function () {
            //attach event for searching
            $("#hlpSearch").on("keyup mouseup input search touchend", function (e) {
                searchHelp($(this).val());
            });
        });
    </script>

So, we will store previous html and insert a new generated html with search items. First items will be more relevant that contains more than 1 keyword in chain. Horizontal line will split most relevant items from less relevant items.


You can add more specific logic for example to avoid plural forms, add importance of keyword order, etc. but as simple and fastest search this will be more than enough.


Thank you and see you


1vqHSTrq1GEoEF7QsL8dhmJfRMDVxhv2y



C#, Json and PHP Form

Today, I had a task to submit data to an PHP API that takes Json data via HTTP POST using ContentType "application/x-www-form-urlencoded" and format like:

order[phone]=+48733552233&order[name]=First Name Last Name&order[deliveryCost]=50&order[deliveryStockCode]=39931b80-e1c2-11e3-8c4a-0050568002cf&order[comment]=test api&orderItems[0][itemID]=194559-0&orderItems[0][salePrice]=9500&orderItems[0][count]=2&orderItems[1][itemID]=071402-0&orderItems[1][salePrice]=750&orderItems[1][count]=5&key=777777777777777

Hah, crazy right? I have never seen such crazy things before and I had no chance except just take it and create myself.

Google did help me a little bit but in my case there were arrays of objects so I had to modify and extend a code I did find on internet.

So, to transform this:


                    var json = new
                    {
                        key = apiKey,
                        order = new
                        {
                            phone = PhoneNumber,
                            name = LastName + " " + FirstName,
                            comment = Comment,
                            deliveryCost = Math.Round(OrderItems.Sum(s => s.AdditionalShippingCharge), 0, MidpointRounding.ToEven).ToString(),
                            deliveryStockCode = Address
                        },
                        orderItems = OrderItems.Select(s => new
                        {
                            itemID = s.ProdId,
                            salePrice = Math.Round(s.Price, 0, MidpointRounding.ToEven),
                            count = s.Quantity
                        }).ToArray()
                    };

into the form data like I shown before, here is my method (it is rough so I'm sure it is not perfect but I had no time to create it cool and clean and may be will re-write it in the future).


Also, in my case , there were anonymous types so it had to identify them somehow, but as you may know already, anonymous types in C# has no explicit and compilation time type declaration to get its typeof().


        public string JsonToHttpFormString(object request, string separator = ",")
        {
            if (request == null)
                throw new ArgumentNullException("request");

            // Get all properties on the object
            var properties = request.GetType().GetProperties()
                .Where(x => x.CanRead)
                .Where(x => x.GetValue(request, null) != null)
                .ToDictionary(x => x.Name, x => x.GetValue(request, null));

            // Get names for all IEnumerable properties (excl. string)
            var propertyNames = properties
                .Where(x => !(x.Value is string) && ((x.Value is IEnumerable) || (x.Value != null && x.Value.GetType().IsConstructedGenericType && x.Value.GetType().Name.Contains("AnonymousType"))))
                .Select(x => x.Key)
                .ToList();

            // Concat all IEnumerable properties into a comma separated string
            bool isAnonym = false;
            foreach (var key in propertyNames)
            {
                var valueType = properties[key].GetType();
                var valueElemType = valueType.IsGenericType
                                        ? valueType.GetGenericArguments()[0]
                                        : valueType.GetElementType();

                isAnonym = valueType.Name.Contains("AnonymousType");
                if (valueElemType.IsPrimitive || valueElemType == typeof(string) || isAnonym)
                {
                    var enumerable = properties[key] as IEnumerable;
                    if (isAnonym && !valueType.IsArray)
                    {
                        List<string> tempvs = new List<string>();
                        var item = properties[key];
                        // Get all properties on the object
                        var properties2 = item.GetType().GetProperties()
                            .Where(x => x.CanRead)
                            .Where(x => x.GetValue(item, null) != null)
                            .ToDictionary(x => x.Name, x => x.GetValue(item, null));

                        foreach (var kkey in properties2)
                        {
                            var valueType2 = kkey.GetType();
                            var valueElemType2 = valueType2.IsGenericType
                                                    ? valueType2.GetGenericArguments()[0]
                                                    : valueType2.GetElementType();

                            if (valueElemType2.IsPrimitive || valueElemType2 == typeof(string))
                            {
                                tempvs.Add(HttpUtility.UrlEncode(key + "[" + kkey.Key + "]") + "=" + HttpUtility.UrlEncode(kkey.Value.ToString()));
                            }
                        }

                        properties.Remove(key);
                        properties.Add(string.Join("&", tempvs), string.Empty);
                        tempvs.Clear();
                        tempvs = null;
                    }
                    else if (isAnonym && valueType.IsArray)
                    {
                        int i = 0;
                        List<string> tempvs = new List<string>();
                        foreach (var item in enumerable)
                        {
                            // Get all properties on the object
                            var properties2 = item.GetType().GetProperties()
                                .Where(x => x.CanRead)
                                .Where(x => x.GetValue(item, null) != null)
                                .ToDictionary(x => x.Name, x => x.GetValue(item, null));

                            foreach (var kkey in properties2)
                            {
                                var valueType2 = kkey.GetType();
                                var valueElemType2 = valueType2.IsGenericType
                                                        ? valueType2.GetGenericArguments()[0]
                                                        : valueType2.GetElementType();

                                if (valueElemType2.IsPrimitive || valueElemType2 == typeof(string))
                                {
                                    tempvs.Add(HttpUtility.UrlEncode(key + "[" + i + "][" + kkey.Key + "]") + "=" + HttpUtility.UrlEncode(kkey.Value.ToString()));
                                }
                            }
                            i++;
                        }
                        properties.Remove(key);
                        properties.Add(string.Join("&", tempvs), string.Empty);
                        tempvs.Clear();
                        tempvs = null;
                    }
                    else
                    {
                        properties[key] = string.Join(separator, enumerable.Cast<object>());
                    }
                }
            }

            // Concat all key/value pairs into a string separated by ampersand and remove trailing '='
            string res = string.Join("&", properties
                .Select(x => string.Concat(
                    string.IsNullOrEmpty(x.Value.ToString()) ? x.Key : HttpUtility.UrlEncode(x.Key), "=",
                    HttpUtility.UrlEncode(x.Value.ToString())).TrimEnd('='))).TrimEnd('=');

            return res;
        }


Also, it has to do URL encoding to get properly formatted data for form submission style and os we finally could get something like that:

order%5Bphone%5D=%2B+44+%28733%29+55-22-33& order%5Bname%5D=%D0%98%D0%BC%D1%8F+%D0%BF%D0%BE%D0%BB%D1%83% D1%87%D0%B0%D1%82%D0%B5%D0%BB%D1%8F& order%5BdeliveryCost%5D=50&order%5BdeliveryStockCode%5D=39931b80-e1c2-11e3-8c4a-0050568002cf& order%5Bcomment%5D=test+api&orderItems%5B0%5D%5BitemID%5D=194559-0&orderItems%5B0%5D%5BsalePrice%5D=9500&orderItems%5B0%5D%5Bcount%5D=2& orderItems%5B1%5D%5BitemID%5D=071402-0&orderItems%5B1%5D%5BsalePrice%5D=750&orderItems%5B1%5D%5Bcount%5D=5&key=777777777777777


Thank you for reading and see you :)




1vqHSTrq1GEoEF7QsL8dhmJfRMDVxhv2y