And now, list and count chunks of words.
--
--[SCRIPT countgroups_ofwords]
(*
Enregistrer ce script en tant que script (sortand_countwords.scpt) ou progiciel (sortand_countwords.app).
Exécuter ce script ou
déposer l'icône d'un fichier texte (xxx.txt) sur l'icône de sortand_countwords.app.
Le script lit le fichier
compte les groupes de mots du texte.
Le résultat est enregistré sur le bureau dans "countedWords.txt".
Il est alors possible de l'exploiter à loisir à l'aide d'un tableur.
***********
Save the script as script (sortand_countwords.scpt) or application bundle (sortand_countwords.app).
Run it or drag and drop a text file icon (xxx.txt) on the sortand_countwords.app 's icon.
The script reads the file
sorts the embedded words
split the sorted list in lines of same length than the original ones.
The result is stored on the desktop in "countedGroupsOfWords.txt".
Then it's easy to treat it in a spreadsheet.
***********
Yvan KOENIG (VALLAURIS, France)
2009/09/21
*)
property nomDuRapport : "countedGroupsOfWords.txt"
property rapport : "" -- globale
property liste1 : {}
property liste2 : {}
property liste3 : {}
--=====
on run (* lignes exécutées si on double clique sur l'icône du script application
• lines executed if one double click the application script's icon *)
--set fichier to choose file of type {"public.plain-text"} without invisibles
set fichier to "Macintosh HD:Users:yvankoenig:Desktop:sort_words:sortwords:Lovecraft.txt" as alias
my commun({fichier})
end run
--=====
on open (sel) (* sel contient une liste d'alias des éléments qu'on a déposés sur l'icône du script (la sélection)
• sel contains a list of aliases of the items dropped on the script's icon (the selection) *)
my commun(sel)
end open
--=====
on commun(elems)
my nettoie()
set theDoc to item 1 of elems
set enTexte to read theDoc
set enTexte to my supprime(enTexte, ",")
set enTexte to my remplace(enTexte, "-", " ")
set my liste1 to paragraphs of enTexte
(* convert to lowercase *)
repeat with i from 1 to count of my liste1
set item i of my liste1 to do shell script "/usr/bin/python -c "import sys; print unicode(sys.argv[1], 'utf8').lower().encode('utf8')" " & quoted form of (item i of my liste1)
end repeat
set enTexte to my recolle(my liste1, return)
set my liste2 to {"string" & tab & "count"}
set my liste3 to {"#" & tab & "string" & tab & "count" & "nb words"}
set l to 0
ignoring case
repeat with i from 1 to count of my liste1
set wordsI to words of item i of my liste1
repeat with j from 1 to count of wordsI
repeat with k from (count of wordsI) to 1 by -1
if not j > k then
set strIJK to my recolle(items j thru k of wordsI, " ")
set nbr to (count of my decoupe(enTexte, strIJK)) - 1
set rec to strIJK & tab & nbr
if rec is not in my liste2 then
set l to l + 1
copy rec to end of my liste2
copy (l as text) & tab & rec & tab & k + 1 - j to end of my liste3
end if
end if
end repeat -- k
end repeat -- j
end repeat -- i
end ignoring
set enTexte to my recolle(my liste3, return)
set p2d to path to desktop
set p2r to (p2d as Unicode text) & nomDuRapport
tell application "System Events"
if exists (file p2r) then delete (file p2r)
make new file at end of p2d with properties {name:nomDuRapport}
end tell
write enTexte to (p2r as alias)
my nettoie()
end commun
--=====
on nettoie()
set my liste1 to {}
set my liste2 to {}
set my liste3 to {}
end nettoie
--=====
on decoupe(t, d)
local l
set AppleScript's text item delimiters to d
set l to text items of t
set AppleScript's text item delimiters to ""
return l
end decoupe
--=====
on remplace(t, d1, d2)
local l
set AppleScript's text item delimiters to d1
set l to text items of t
set AppleScript's text item delimiters to d2
set t to l as text
set AppleScript's text item delimiters to ""
return t
end remplace
--=====
on recolle(l, d)
local t
set AppleScript's text item delimiters to d
set t to l as text
set AppleScript's text item delimiters to ""
return t
end recolle
--=====
on supprime(t, d)
local l
set AppleScript's text item delimiters to d
set l to text items of t
set AppleScript's text item delimiters to ""
return l as text
end supprime
--=====
on sort_list(unsortedList)
set AppleScript's text item delimiters to (ASCII character 10)
set sortedList to paragraphs of (do shell script "echo " & quoted form of (unsortedList as string) & "| sort -d -f")
set AppleScript's text item delimiters to ""
return sortedList
end sort_list
--=====
--[/SCRIPT]
--
Yvan KOENIG (VALLAURIS, France) lundi 21 septembre 2009 21:28:46