First import

2017-01-06 12:41:43 +00:00 · 2017-01-06 12:41:43 +00:00 · 4f08d44df3
commit 4f08d44df3
3 changed files with 156089 additions and 0 deletions
--- a/cercasogno.rb
+++ b/cercasogno.rb
@ -0,0 +1,294 @@
+#!/usr/bin/env ruby
+#coding: utf-8
+
+require 'pathname'
+$APP_PATH = File.join(File.dirname(Pathname.new(__FILE__).realpath), "/")
+
+require 'yaml'
+#require 'set'
+
+begin
+	require 'Win32/Console/ANSI' if RUBY_PLATFORM =~ /win32/
+	$COLORIZING = true
+rescue LoadError
+	$COLORIZING = false
+end
+
+=begin
+start of code copied from cercasogno_dizmaker.rb
+note that the following code should never change!!
+=end
+DREAM_INFO = Struct.new("DREAM_INFO", :strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL)
+=begin
+end of code copied from cercasogno_dizmaker.rb
+=end
+
+CConsoleInfo = Struct.new(:nRows, :nColumns, :bAutoReturn)
+
+class CIndexer
+	def initialize(hData)
+		@hExpanded = Hash.new
+		hData.each do |strCompactIndex, value|
+			raise "Tutti gli indici per questo dizionario devono essere di tipo String ma è stato trovato un oggetto di classe #{strCompactIndex.class}: \"#{strCompactIndex.inspect}\"" unless strCompactIndex.is_a? String
+			raise "Tutti gli indici per questo dizionario devono essere stringhe con lunghezza almeno 1" if strCompactIndex.length < 1
+			getExpandedIndex(strCompactIndex).each do |strExp|
+				raise "Un elemento precedentemente incontrato è già stato espanso in \"#{strExp}\"" if @hExpanded.include? strExp
+				@hExpanded[strExp] = value
+			end
+		end
+		#puts @hExpanded.inspect
+	end
+
+	def [](index)
+		@hExpanded[index]
+	end
+
+	def getExactKeysByGuessing(strKey)
+		if @hExpanded.include? strKey
+			return [@hExpanded[strKey]]
+		else
+			#if the easy part didn't work, we need to put some extra effort in the search
+			return getListOfCandidates(strKey)
+		end
+	end
+
+private
+	def getExpandedIndex(strContracted)
+		aSplit = strContracted.split(/[\\\/-]/).collect {|strItem| strItem.strip}.select {|strItem| strItem.length > 0}
+		return [strContracted] if aSplit.length <= 1
+
+		aSuffixes = aSplit[1..-1]
+		nSuffixLen = aSuffixes.inject(aSuffixes.first.length) do |memo, strSuffix|
+			nLen = strSuffix.length();
+			strSuffix.length().between?(1, memo) ? strSuffix.length() : memo
+		end
+
+		strBase = aSplit.first
+		nSuffixLen = strBase.length if strBase.length < nSuffixLen
+		aSplit[0] = strBase[strBase.length - nSuffixLen..-1]
+		strBase = (strBase.length > nSuffixLen ? strBase[0..(strBase.length - nSuffixLen - 1)] : "")
+		aSplit.collect {|strSuffix| strBase + strSuffix}
+	end
+
+	def getListOfCandidates(strSearch)
+		regSearch = /(?i)#{CIndexer::duplicateAccentedForRegex(strSearch)}/
+		getListOfCandidatesByRegex(regSearch)
+	end
+
+	def getListOfCandidatesByRegex(regSearch)
+		aRet = Array.new
+		@hExpanded.each_key do |strKey|
+			aRet << strKey if regSearch =~ strKey
+		end
+		aRet
+	end
+
+	def self.duplicateAccentedForRegex(str)
+		aAccents = [
+			["a", "á", "à", "â", "ä", "ã"],
+			["A", "Ã", "Ä", "Â", "À", "Á"],
+			["e", "é", "è", "ê", "ë"],
+			["E", "Ë", "É", "È", "Ê"],
+			["i", "í", "ì", "î", "ï"],
+			["I", "Í", "Î", "Ì", "Ï"],
+			["o", "ó", "ò", "ô", "ö", "õ"],
+			["O", "Õ", "Ö", "Ô", "Ò", "Ó"],
+			["u", "ú", "ù", "û", "ü"],
+			["U", "Ú", "Û", "Ù", "Ü"],
+			["c", "ç"], ["C", "Ç"],
+			["n", "ñ"], ["N", "Ñ"]
+		]
+		#sAccentedMix = Set.new(aAccents.flatten)
+		hAccentedMix = Hash.new
+		aAccents.each_index do |z|
+			aAccents[z].each {|strLetter| hAccentedMix[strLetter] = z}
+		end
+
+		strRet = ""
+		str.each_char do |strLetter|
+			if hAccentedMix.include? strLetter then
+				strRet += "[" + aAccents[hAccentedMix[strLetter]].join("|") + "]"
+			else
+				strRet += strLetter
+			end
+		end
+		return strRet
+	end
+end
+
+class CAnsiColorizer
+	@@regStartsByDigit = /^\d/
+
+	def initialize(bEnabled=nil)
+		@bEnabled = (bEnabled.nil? ? $COLORIZING : bEnabled)
+		@hTemplates = Hash.new
+		@hRegexToColor = Hash.new
+	end
+
+	def addTemplate(strName, nForeground, nBackground=nil, nMain=0)
+		raise "Template names cannot start by a number" if strName.is_a?(String) && (@@regStartsByDigit =~ strName)
+		@hTemplates[strName] = [nMain, nForeground, nBackground].compact.join(";")
+		return true
+	end
+
+	def getColorized(strText, color)
+		if color.is_a?(Numeric) || color.is_a?(String) && (@@regStartsByDigit =~ color) then
+			return colorize(strText, color.to_s)
+		else
+			if @hTemplates.include?(color) then
+				return colorize(strText, @hTemplates[color])
+			else
+				return strText
+			end
+		end
+	end
+
+	def addAutoColorRegex(regRegex, color)
+		raise "Invalid regex" unless regRegex.is_a? Regexp
+		@hRegexToColor[regRegex] = color
+		true
+	end
+
+	def getAutoColorized(strText)
+		strRet = strText
+		@hRegexToColor.each do |regMatch, color|
+			m = regMatch.match(strText)
+			nFrom = 0
+			while m do
+				nOldLen = strRet.length
+				strRet[(nFrom+m.begin(0))..(nFrom+m.end(0)-1)] = self.getColorized(m[0], color)
+				nFrom += m.end(0) + strRet.length - nOldLen
+				m = regMatch.match(strRet[nFrom..-1])
+			end
+		end
+		strRet
+	end
+private
+	def colorize(text, color_code)
+		#source: http://kpumuk.info/ruby-on-rails/colorizing-console-ruby-script-output/
+		@bEnabled ? "\033[#{color_code}m#{text}\033[0m" : text
+	end
+end
+
+def GetConsoleInfo()
+	CConsoleInfo.new(40, 80, false)
+end
+
+def PutsMulticolumnFixedLength(aList, colorizer, nWidth=nil, bVerticalOrder=true, nForceMinSpacing=1)
+	return 0 if aList.empty?
+	nLen = aList.first.length
+	strEOL = "\n"
+	unless nWidth then
+		ciInfo = GetConsoleInfo()
+		strEOL = "" if ciInfo.bAutoReturn
+		nWidth = ciInfo.nColumns
+	end
+
+	raise "Gli elementi devono avere lunghezza maggiore di 0" if nLen == 0
+	raise "Gli elementi non devono essere più lunghi dello spazio disponibile (#{nLen} > #{nWidth})" if nLen > nWidth
+	nColumns = (nWidth < nLen + nForceMinSpacing ? 1 : nWidth / (nLen + nForceMinSpacing) )
+	strSpacing = " " * ((nWidth - nLen * nColumns) / nColumns)
+	#raise "assert" if strSpacing.length < nForceMinSpacing
+	nBlockLen = strSpacing.length + nLen
+	#assert strSpacing >= 1
+
+	if !bVerticalOrder then
+		nCount = nColumns
+		(aList.length / nColumns).times do |z|
+			nFrom = z * nColumns
+			nCount = aList.length - nFrom if aList.length - nFrom < nCount
+			print colorizer.getAutoColorized(aList[nFrom..(nFrom + nCount - 1)].collect {|s| s + strSpacing}.join("")) + strEOL
+		end
+	else
+		nCount = nColumns
+		nRows = aList.length / nColumns
+		nLongerColumns = aList.length % nColumns
+		((aList.length + nColumns - 1) / nColumns).times do |z|
+			nFrom = z * nColumns
+			nCount = aList.length - nFrom if aList.length - nFrom < nCount
+			aIndices = Array.new(nCount) {|i| i * nRows + [nLongerColumns, i].min + z}
+			print colorizer.getAutoColorized(aList.values_at(*aIndices).collect {|s| s.to_s + strSpacing}.join("")) + strEOL
+		end
+	end
+end
+
+def Disambiguate(aList, colorizer)
+	raise "Ricevuta una lista vuota" if aList.empty?
+	return 0 if aList.length == 1
+
+	puts "Specifica meglio il termine della ricerca:"
+
+	nLongestEntry = aList.inject(aList.first.length) do |memo, strItem|
+		raise "Tutti gli elementi ricevuti devono essere di tipo String, ma è stato trovato un #{strItem.class}" unless strItem.is_a? String
+		(strItem.length > memo ? strItem.length : memo)
+	end
+
+	strSeparator = " - "
+	nLongestID = aList.length.to_s.length
+	nRequiredCols = nLongestID + nLongestEntry + strSeparator.length
+
+	aMenu = Array.new
+	aList.each_index do |z|
+		aMenu << (z + 1).to_s.ljust(nLongestID, " ") + strSeparator + aList[z].rjust(nLongestEntry, " ")
+	end
+	PutsMulticolumnFixedLength(aMenu, colorizer, nil, true, 2)
+
+	bDone = false
+	nIndex = 0
+	regInt = /^\d{1,3}$/
+	until bDone do
+		strInp = $stdin.gets.chomp.strip
+		if regInt =~ strInp then
+			nIndexMaybe = strInp.to_i
+			if nIndexMaybe.between?(0, aList.length) then
+				nIndex = nIndexMaybe
+				bDone = true
+			end
+		end
+	end
+	nIndex - 1
+end
+
+def DrawMeaning(objDream, colorizer)
+	raise "objDream can't be null" if objDream.nil?
+	#:strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL
+	puts "#{colorizer.getColorized(objDream.strDesc.capitalize, :main)} (#{colorizer.getColorized(objDream.nNumber, :numeric)}):"
+	puts objDream.strMeaning if objDream.strMeaning && objDream.strMeaning.length > 0
+	if objDream.aSubDescs then
+		aMeanings = Array.new
+		objDream.aSubDescs.each do |objSubDesc|
+			aMeanings << "#{colorizer.getColorized(objSubDesc.strDesc, :desc)}: #{colorizer.getColorized(objSubDesc.strMeaning, :normal)}; #{colorizer.getColorized(objSubDesc.nNumber, :numeric)}"
+		end
+		puts aMeanings.sort.join("\n")
+	end
+	nil
+end
+
+if ARGV.length == 0 then
+	puts "Specificare il tema della ricerca"
+	exit
+end
+
+#hDreams = YAML::load(File.read(File.join($APP_PATH, "cercasogno_diz.yml")))
+hDreams = nil
+File.open(File.join($APP_PATH, "cercasogno_diz.yml"), 'r') {|fh| hDreams = YAML.load(fh) }
+#puts hDreams.length
+indexer = CIndexer.new(hDreams)
+
+colorizer = CAnsiColorizer.new
+colorizer.addAutoColorRegex(/\d+/, :numeric)
+
+colorizer.addTemplate(:numeric, 36)
+colorizer.addTemplate(:main, 31, nil, 4)
+colorizer.addTemplate(:desc, 33)
+
+aGuess = indexer.getExactKeysByGuessing(ARGV.first)
+nSelection = 0
+if aGuess.length == 0 then
+	puts "Nessun risultato per \"#{ARGV.first}\""
+	exit
+elsif aGuess.length > 1 then
+	nSelection = Disambiguate(aGuess, colorizer)
+end
+
+DrawMeaning(indexer[aGuess[nSelection]], colorizer)
--- a/cercasogno_diz.yml
+++ b/cercasogno_diz.yml
--- a/cercasogno_dizmaker.rb
+++ b/cercasogno_dizmaker.rb
@ -0,0 +1,129 @@
+#!/usr/bin/env ruby
+#coding: utf-8
+
+require 'pathname'
+$APP_PATH = File.join(File.dirname(Pathname.new(__FILE__).realpath), "/")
+
+require 'rubygems'
+require 'net/http'
+require 'nokogiri'
+require 'yaml'
+
+DREAM_INFO = Struct.new(:strDesc, :strMeaning, :nNumber, :aSubDescs, :strReferenceURL)
+
+def DropIntermediateAccents(str)
+	accents = {
+		[/á\B/, /à\B/, /â\B/, /ä\B/, /ã\B/] => 'a',
+		[/Ã\B/, /Ä\B/, /Â\B/, /À\B/, /Á\B/] => 'A',
+		[/é\B/, /è\B/, /ê\B/, /ë\B/] => 'e',
+	    [/Ë\B/, /É\B/, /È\B/, /Ê\B/] => 'E',
+		[/í\B/, /ì\B/, /î\B/, /ï\B/] => 'i',
+	    [/Í\B/, /Î\B/, /Ì\B/, /Ï\B/] => 'I',
+		[/ó\B/, /ò\B/, /ô\B/, /ö\B/, /õ\B/] => 'o',
+	    [/Õ\B/, /Ö\B/, /Ô\B/, /Ò\B/, /Ó\B/] => 'O',
+		[/ú\B/, /ù\B/, /û\B/, /ü\B/] => 'u',
+	    [/Ú\B/, /Û\B/, /Ù\B/, /Ü\B/] => 'U',
+		[/ç/] => 'c', [/Ç/] => 'C',
+	    [/ñ/] => 'n', [/Ñ/] => 'N'
+	}
+	strRet = str.to_s()
+	accents.each do |ac,rep|
+		ac.each do |s|
+			strRet.gsub!(s, rep)
+		end
+	end
+	return strRet #.gsub(/[^\w\s.:,;@#§\[\]()=?!^"£$%&@°\\\/=*+-]\B/, "")
+end
+
+def GetPurifiedString(strText)
+	strRet = DropIntermediateAccents(strText.strip())
+	#strRet = strText.strip()
+	hCleaning = {"" => /\r|\n/, " " => /\s{2,}/, "'" => "’"}
+	hCleaning.each do |strReplace, regMatch|
+		strRet.gsub! regMatch, strReplace
+	end
+	return strRet.strip
+end
+
+def ExtractSubsections(strSectionRaw)
+	aSubsections = Array.new
+
+	regSubElement = /^\W*([^0-9;:,.-]+)[\s;:,.-]+([^0-9;:=-]+?)\b\W+(\d+)/
+	regInteger = /^\d+$/
+
+	mSubElement = regSubElement.match(strSectionRaw)
+	nStart = 0
+	while mSubElement do
+		diNew = DREAM_INFO.new(GetPurifiedString(mSubElement[1]), GetPurifiedString(mSubElement[2]), mSubElement[3].to_i())
+		aSubsections << diNew
+		bCriticalError = regInteger =~ diNew.strMeaning || regInteger =~ diNew.strDesc
+		bCriticalError |= diNew.strMeaning.empty? || diNew.strDesc.empty?
+		$stderr.puts "M: " + diNew.strMeaning if bCriticalError || /[-.:,;@#§+\\!"£$%&()=?^°\[\]*0-9]/ =~ diNew.strMeaning
+		$stderr.puts "D: " + diNew.strDesc if bCriticalError || /[-.:,;@#§+\\!"£$%&()=?^°\[\]*0-9]/ =~ diNew.strDesc
+		raise "Critical error" if bCriticalError
+#  		puts "#{diNew.strDesc}: #{diNew.strMeaning} (#{diNew.nNumber})"
+		nStart += mSubElement.end(0)
+		mSubElement = regSubElement.match(strSectionRaw[nStart..-1])
+	end
+	if aSubsections.length == 0
+		return nil
+	else
+		return aSubsections
+	end
+end
+
+def ExtractElementsFromHtml(strReferenceUrl, strHtml)
+	#The following query fetches <br> nodes from parent node "table" (ref: http://stackoverflow.com/questions/1485356/how-to-get-xpath-of-text-between-br-or-br)
+	strXPathBR = "//table[@class=\"centraleUnico\"]/br/following-sibling::text() | //table[@class=\"centraleUnico\"]//br/preceding-sibling::text()"
+
+	regHead = /^(.+?)\b.*=\s*(\d+)/
+	regDetails = /^.+?:.+?,\s*\d+/
+
+	bExpectingDetails = false
+	diPrevItem = nil
+	hRet = Hash.new
+	doc = Nokogiri::HTML(strHtml)
+	doc.xpath(strXPathBR).each do |link|
+		strText = GetPurifiedString(link.content)
+		bMatchHead, bMatchDetails = regHead =~ strText, regDetails =~ strText
+		next unless bMatchHead || bMatchDetails
+
+		bExpectingDetails = diPrevItem && bMatchDetails
+
+		if bExpectingDetails then
+		  diPrevItem.aSubDescs = ExtractSubsections(strText)
+# 		  raise "An item's details were expected" unless mExtract
+		else
+			mExtract = regHead.match(strText)
+			raise "A new item was expected" unless mExtract
+			diPrevItem = DREAM_INFO.new(mExtract[1], nil, mExtract[2].to_i(), nil, strReferenceUrl)
+			hRet[diPrevItem.strDesc] = diPrevItem
+			print diPrevItem.strDesc + " "
+		end
+		bExpectingDetails = !bExpectingDetails
+	end
+
+# 	strHtml.force_encoding("iso-8859-1").encode("UTF-8")
+	return hRet
+end
+
+aLetters = %w{a b c d e f g h i l m n o p q r s t u v z}
+
+HOST_URL = "www.metropolino.com"
+hElements = Hash.new
+conn = Net::HTTP.new(HOST_URL, 80)
+aLetters.each do |strLetter|
+	strPageAddress = "/smorfia/interpretazione-dei-sogni-" + strLetter + ".asp"
+	response, strPage = conn.get(strPageAddress)
+	if response.is_a? Net::HTTPSuccess then
+		hElements.merge! ExtractElementsFromHtml(HOST_URL + strPageAddress, strPage)
+	else
+		puts "Error while retrieving #{strPageAddress} (Error #{response.code}: #{response.message})"
+		response.error!
+	end
+end
+puts
+
+File.open(File.join($APP_PATH, "sogno_diz.yml"), "w") do |fDst|
+	fDst.write(YAML::dump(hElements))
+end