Hello friends,
if you are in need of simple, your own search that will search through all your help documents then this may be a good case for you.
I will show my own simple and fast way if such implementation that I'm sure you will be able to use wherever you need: PHP, Java, Python, .NET, Ruby, Node.JS, etc.
So, lets start and see how it works!
1. First stage will be indexing your help files or DB records. In my case I had html files, so here is my code:
char[] cToRemove = new char[] { '\r', '\n', '\t', ' ', ',', '/', '\\', '~', '–', '.', ':', '\'', '!', ';', '[', ']', '"', '{', '}', '=', '+', '_', ')', '(', '*', '&', '?', '%', '$', '#', '@', '`', '<', '>', '|' };
char[] cToSplit = new char[] { ' ' };
string[] toSkip = new string[]
{
"i", "me", "you", "he", "she", "they", "them", "this", "that", "than", "then", "it", "our", "their", "her", "his", "its", "it's",
"was", "were", "is","are", "be", "being", "been", "can", "could", "should", "shall", "will", "would", "have", "has", "did", "do", "does",
"may", "might", "must", "need", "better", "if", "else","also", "same", "now","new", "below", "above",
"itself", "ourselves", "himself", "herself", "theirselves", "let", "get", "set", "done"
};
public ActionResult RunHelpCrawler()
{
string root = System.Web.HttpContext.Current.Server.MapPath("~/Views/Help");
string json = System.Web.HttpContext.Current.Server.MapPath("~/shelp.json");
AsyncManager.OutstandingOperations.Increment();
System.Threading.Tasks.Task.Factory.StartNew(() =>
{
string suberrors = string.Empty;
try
{
string path = root;
string lastKeywrd = string.Empty;
//keywords, url, title
List<Models.Search> search = new List<Models.Search>();
foreach (string filePath in System.IO.Directory.EnumerateFiles(path))
{
//exclude everything you need
if (filePath.Contains("LeftSideMenu") || filePath.Contains("GetHelpTips"))
{
continue;
}
try
{
string url = Extensions.Extensions.DomainName + "/Help/" + System.IO.Path.GetFileNameWithoutExtension(filePath);
url = url.ToLowerInvariant();
//we are going to parse HTML to avoid unneeded text. tags, etc
var web = new HtmlAgilityPack.HtmlWeb();
var doc = web.Load(url);
var nH1 = doc.DocumentNode.Descendants("h1")
.FirstOrDefault();
string title = url;
if (nH1 != null)
{
title = nH1.InnerText;
}
string text = string.Empty;
//our HTMLs have marked sections with the 'shelp' class that we will use for indexing only
var nodes = doc.DocumentNode.Descendants()
.Single(x => x.Attributes["class"] != null && !string.IsNullOrEmpty(x.Attributes["class"].Value) && x.Attributes["class"].Value.Contains("shelp"))
.Descendants()
.Where(n =>
n.NodeType == HtmlAgilityPack.HtmlNodeType.Text &&
n.ParentNode.Name != "script" &&
n.ParentNode.Name != "style");
text = string.Empty;
foreach (var node in nodes)
{
text += node.InnerText;
}
foreach (var oc in cToRemove)
{
text = text.Replace(oc, ' ');
}
text = text.ToLowerInvariant();
//words to skip indexing like: he, she, this, that...
string[] keywords = text.Split(cToSplit, StringSplitOptions.RemoveEmptyEntries);
foreach (var keyword in keywords)
{
if (!string.IsNullOrEmpty(keyword) && keyword.Length > 2 && !toSkip.Any(a => a == keyword))
{
lastKeywrd = keyword;
Models.Search item = null;
if (item == null)
{
item = search.FirstOrDefault(i => i.UrlTitle.Any(a => a.Item1 == url));
if (item == null)
{
item = new Models.Search() { Keywords = "," + keyword + ",", UrlTitle = new List<Tuple<string, string>>() };
search.Add(item);
}
else
{
if (!item.Keywords.Contains("," + keyword + ","))
{
item.Keywords = item.Keywords.TrimEnd(',') + "," + keyword + ",";
}
}
}
if (!item.UrlTitle.Any(a => a.Item1 == url))
{
item.UrlTitle.Add(new Tuple<string, string>(url, title));
}
}
}
search = search.GroupBy(g => g.Keywords).Select(s => s.First()).ToList();
}
catch(Exception ex)
{
//LogException
}
}
using (System.IO.StreamWriter sw = new StreamWriter(json, false))
{
sw.Write(SimpleJson.SimpleJson.SerializeObject(search));
}
}
catch (Exception ex)
{
//LogException
}
});
return Content("ok");
}
Whenever you add new help document just re-run this simple crawler that will re-index everything
It is not important what platform and language you use to generate json, it is important what json structure you will get.
So here is the structure of json you need to generate so the javascript code would recognize and use it properly:
[
{
"Keywords": "kword1,kword2,kword3",
"UrlTitle": [
{
"Item1": "https://www.yourwebsiteurl.com/help/page1",
"Item2": "Title of the page 1"
}
]
},
{
"Keywords": "kword1,kword2,kword3",
"UrlTitle": [
{
"Item1": "https://www.yourwebsiteurl.com/help/page2",
"Item2": "Title of the page 2"
}
]
},
{
"Keywords": "kword1,kword2,kword3",
"UrlTitle": [
{
"Item1": "https://www.yourwebsiteurl.com/help/page3",
"Item2": "Title of the page 3"
}
]
},
....
2. UI and Javascript parts to allow users using this simple serverless search
UI
<div class="row">
<div class="col-md-12">
<div class="input-group">
<span class="input-group-addon" id="shlpSearch" style="border: 1px solid #ccc;height: 26px;padding-top: 2px;padding-bottom: 2px;">Search</span>
<input type="search" id="hlpSearch" class="form-control" style="max-width:100%;height: 26px;padding: 6px;" title="Search help"
placeholder="How to ...">
</div>
</div>
</div>
JS
<script>
var shelp;
//load our index json and avoid caching
$.getJSON("/shelp.json?antc="+new Date().getTime(), function (data) {
shelp = data;
});
var prevHtml = "";
function searchHelp(sh) {
if (sh != "" && sh.length > 2) {
if (prevHtml == "") {
prevHtml = $(".shelp").html();
}
var items = [];
var lessitems=[];
var ss = sh.replace(",", " ").split(" ");
$.each(shelp, function (i) {
var br=0;
for(var f=0;f<ss.length;f++){
br += (shelp[i].Keywords.indexOf(ss[f]) >= 0)?1:0;
}
$.each(shelp[i].UrlTitle, function (ii) {
var el = "<a class='label label-default' style='font-size:125%;line-height:2' href='" + shelp[i].UrlTitle[ii].Item1 + "'>" + shelp[i].UrlTitle[ii].Item2 + "</a>";
//most relevant first
if(br==ss.length){
if ($.inArray(el, items) === -1){
items.unshift(el);
}
}
else if(br > 0){ //less relevant but containing at least one keyword
if ($.inArray(el, items) === -1 && $.inArray(el, lessitems) === -1){
lessitems.push(el);
}
}
});
});
if(lessitems.length>0){
//split less relevant by horizontal line
items.push("<hr style='margin: 0;padding: 0;'/>");
$.each(lessitems, function (i) {
items.push(lessitems[i]);
});
}
$(".shelp").css("border", "1px solid");
$(".shelp").css("box-shadow", "0 0 8px 1px");
$(".shelp").html(items.join("<br/>"));
}
else if (prevHtml != "") {
$(".shelp").css("border", "");
$(".shelp").css("box-shadow", "");
$(".shelp").html(prevHtml);
prevHtml = "";
}
}
$(document).ready(function () {
//attach event for searching
$("#hlpSearch").on("keyup mouseup input search touchend", function (e) {
searchHelp($(this).val());
});
});
</script>
So, we will store previous html and insert a new generated html with search items. First items will be more relevant that contains more than 1 keyword in chain. Horizontal line will split most relevant items from less relevant items.
You can add more specific logic for example to avoid plural forms, add importance of keyword order, etc. but as simple and fastest search this will be more than enough.
Thank you and see you
Updated JavaScript version (2020):
function searchHelp(sh) {
if (sh != "" && sh.length > 2) {
if (prevHtml == "") {
prevHtml = $(".shelp").html();
}
var topitems = [];
var items = [];
var lessitems=[];
var ss = sh.replace(",", " ").match(/[^ ]+/g);
ss = ss.filter(function(item) {
return item.length > 3;
});
$.each(shelp, function (i) {
var br=0;
for(var f=0;f<ss.length;f++){
if(ss[f]!=" " && ss[f]!="" && ss[f].length>3){
br += (shelp[i].Keywords.indexOf(ss[f]) >= 0)?1:0;
}
}
$.each(shelp[i].UrlTitle, function (ii) {
var el = "<a class='label label-default' "+
"style='font-size:125%;line-height:2;width:100%;display:inline-flex;'"+
" href='" + shelp[i].UrlTitle[ii].Item1 + "'>" +
shelp[i].UrlTitle[ii].Item2 + "</a>";
var inTitle = false;
$.each(ss, function (si) {
inTitle |= shelp[i].UrlTitle[ii].Item2.search(new RegExp(ss[si], "i")) >= 0;
});
if (inTitle) {
if ($.inArray(el, topitems) === -1){
topitems.push(el);
}
}
else if (br == ss.length) {
if ($.inArray(el, items) === -1) {
items.unshift(el);
}
}
else if(br > 0){
if ($.inArray(el, items) === -1 && $.inArray(el, lessitems) === -1){
lessitems.push(el);
}
}
});
});
$.each(topitems, function (i) {
items.unshift(topitems[i]);
});
if(lessitems.length>0){
items.push("<hr style='margin: 0;padding: 0;'/>"+
"<span style='font-size: 65%;padding-left: 1%'>less relevant</span>");
$.each(lessitems, function (i) {
items.push(lessitems[i]);
});
}
$(".shelp").css("border", "1px solid");
$(".shelp").css("box-shadow", "0 0 8px 1px");
$(".shelp").html("<div class='ishelp'>" + items.join("<br/>")+"</div>");
}
else if (prevHtml != "") {
$(".shelp").css("border", "");
$(".shelp").css("box-shadow", "");
$(".shelp").html(prevHtml);
prevHtml = "";
}
}
This update will set items that contain keywords in title on top of the list.
Thank you

1vqHSTrq1GEoEF7QsL8dhmJfRMDVxhv2y